Risk factors associated with COVID-19 mortality in patients with type 2 diabetes - Case-Control

2024-07-03

Carlos Ballon-Salcedo & Kevin J. Paez

Set global knitr options

knitr::opts_chunk$set(message = FALSE, warning = FALSE)

Load packages

# Packages
pacman::p_load(
  rio,
  here,
  reportfactory,
  rfextras,
  tidyverse,
  ggcorrplot,
  ggsci,
  ggpubr,
  ggbiplot,
  finalfit,
  gtsummary,
  flextable,
  ftExtra,
  broom,
  performance,
  lmtest,
  stats,
  moments,
  nortest,
  epiDisplay,
  car,
  Rtsne,
  factoextra,
  corrplot,
  grateful,
  patchwork,
  officer
)
# My function scripts
rfextras::load_scripts()
grateful::cite_packages(
  output = "file",
  out.format = "docx",
  out.dir = ".",
  citation.style = "vancouver")
##   |                                                                              |                                                                      |   0%  |                                                                              |......................................................................| 100%                                                                                          
## "C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS grateful-report.knit.md --to docx --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc5f047a9420d.docx --lua-filter "C:\Users\user\AppData\Local\R\win-library\4.3\rmarkdown\rmarkdown\lua\pagebreak.lua" --highlight-style tango --citeproc
## [1] "./grateful-report.docx"

Import data

clean_data <- import(here("data", "data.csv"))

Set themes

Define gtsummary theme

my_gtsummary_theme <-
  list(
    "pkgwide-fn:pvalue_fun" = function(x)
      style_pvalue(x, digits = 2),
    "pkgwide-fn:prependpvalue_fun" = function(x)
      style_pvalue(x, digits = 2, prepend_p = TRUE),
    "tbl_summary-str:continuous_stat" = "{median} ({p25}, {p75})",
    "tbl_summary-str:categorical_stat" = "{n} ({p}%)",
    "tbl_summary-fn:percent_fun" = function(x)
      style_number(x, digits = 1, scale = 100),
    "tbl_summary-arg:missing" = "no"
  )

# Set a gtsummary theme
set_gtsummary_theme(my_gtsummary_theme, theme_gtsummary_compact())

# Set a gtsummary language
theme_gtsummary_language(language = "en")

Define flextable theme

my_flextable_theme <- function(x, bold_header = FALSE) {
  std_border <- fp_border(width = 1, color = "grey14")
  
  x <- border_remove(x)
  x <- hline_top(x, border = std_border, part = "header")
  x <- hline_bottom(x, border = std_border, part = "header")
  x <- bold(x, bold = bold_header, part = "header")
  x <- hline_bottom(x, border = std_border, part = "body")
  x <- align(x, align = "left", part = "body")
  x <- align(x, align = "center", part = "header")
  x <- font(x, part = "all", fontname = "Segoe UI")
  fix_border_issues(x, part = "all")
  autofit(x)
}

Process data

Remove missing values (>10% by variable)

The following variables were removed: mcv, mch, hemoglobin, hematocrit, creatinine, urea, glucose, ph, anion_gap, sodio, potasio, cloro, calcio, fi_o2, pco2, hco3, lactate, antiparasitic.

Matching

The matchup will be with a 1:3 ratio match.

# Matching for Causal Inference
data <- MatchIt::matchit(
  I(t2dm == "yes") ~ edad,
  data = clean_data,
  exact = ~ sex,
  caliper = c(edad = 3),
  std.caliper = T,
  distance = "euclidean",
  ratio = 3
)

# Construct a matched dataset from a matchit object
data_match_eda <- MatchIt::match.data(data, subclass = "matched_id")

Recode and relevel (dictionary)

This subsection converts categorical variables into factors and recode/clean values using a data dictionary. The categorical variables will be saved as factor vectors, while some continuous/numeric variables will be categorized and converted into factors. Then, recode and change the factor levels using the forcats package. Also, the finalfit package will be utilized to label variables, while the ftExtra package will detect special characters and format the character columns as markdown text to be added to the flextable object. An alternate option is to utilize the matchmaker package for dictionary-based cleanup or labelled package to manipulate metadata. The disadvantage of categorizing continuous variables is that information is discarded. To address this, repeat the analysis on continuous variables to ensure there are no differences in the conclusion (sensitivity analysis).

📝NOTE: The markdown texts is by design a plain text

Exposures and outcomes

Potential risk factors associated with mortality caused by COVID-19 in patients with type II diabetes mellitus (T2DM) are presented, each factor/variable is labeled with a legend that indicates its clinical importance, accuracy, and number of events. This is see in the dictionary script.

data_match_eda <- dictionary(data_match_eda)

Exploratory Data Analysis (EDA)

In this subsection we will see a preview of the descriptive statistics, some summary measures from our sample. How you visualize the distribution of a variable will depend on whether the variable is categorical or continuous. To analyze the distribution of a continuous variable, begin with a histogram or density plot for a visual representation of the data distribution. For categorical variables, a bar chart or a pie/donut chart provides a clear comparison of different categories. In cases where a categorical variable has few levels, consider using tables or boxplots for a more concise display of the data.

📝NOTE: Descriptive statistics help you describe your current data, while inferential statistics allow you to make predictions and informed decisions based on your data.

Categorical variables

A categorical variable is a variable that can take on one of a small set of values. These values can be character, logical, or factor values. According to the object classes, character class objects are the most flexible, while logic class objects are the most restrictive. Many categorical variables lack an intrinsic order, and thus, it is necessary to reorder them to create a more informative display.

# Variables with relative frequency less than 2.5%
excluded_var <- categorical_pivot |>
  dplyr::filter(Porcentaje < 2.5) |>
  dplyr::distinct(Variable) |>
  dplyr::pull()
# Multiple tables by T2DM
lapply(categorical, function(x)
  table(x, categorical$t2dm))

Numerical variables

The distribution of continuous/numeric variables can be visualized using histograms, frequency polygons, Q-Q plots, box plots, violin plots, jitter plots, and Sina plots. A variable is considered continuous if it can assume any value from an infinite set of ordered values within an interval. A histogram shows the distribution of a continuous variable and differs from a bar chart by grouping data into continuous intervals. It also examines the distribution of a continuous variable broken down by a categorical variable (categorical outcome). A density plot displays the density instead of the count, which is the standardized count so that the area under each frequency polygon is 1 or 100%. In addition to the histogram and the frequency polygon, the probability distribution of a continuous variable can be obtained from the density function. This function is defined in terms of probability and would construct a frequency polygon if we had an infinite set of data, resulting in a continuous curve.

# Selection of numerical variables
numerical <- data_match_eda |>
  dplyr::select(where(is.numeric))

# Summary statistics
lapply(numerical, function(x)
  summary(x))
# Variable groups
dem_numerical <- numerical |>
  dplyr::select(edad:p_a_diastolica)

hemo_numerical <- numerical |>
  dplyr::select(wbc:platelets)

gas_numerical <- numerical |>
  dplyr::select(saturacion_de_oxigeno:pao2)

# Formal and visual exploration
total_plots(dem_numerical)

Numerical variables in survivors and non-survivor

In this part, density plots will be used to illustrate the data grouped by the outcome. Other plots will not be included in the analysis, as a table with more detailed information will be presented. The questions we will ask ourselves are: Does the data appear to be normally distributed? and are the variances between groups equal?

# Selection of numerical variables of patients with T2DM
surv_numerical_t2dm <- data_match_eda |>
  dplyr::select(where(is.numeric), outcome, t2dm) |>
  dplyr::filter(t2dm == "yes")

# # Selection of numerical variables of patients without T2DM
non_numerical_not2dm <- data_match_eda |>
  dplyr::select(where(is.numeric), outcome, t2dm) |>
  dplyr::filter(t2dm == "no")
# Summary statistics of survivors
lapply(surv_numerical_t2dm, function(x)
  summary(x))

# Summary statistics of non-survivors
lapply(non_numerical_not2dm, function(x)
  summary(x))
# Exploration of age by group
p1 <- group_stat_table_plot(surv_numerical_t2dm, "frecuencia_cardiaca", "outcome")

# Exploration of white-cells by group
p2 <- group_stat_table_plot(surv_numerical_t2dm, "pafi", "outcome")

# Exploration of neutrophils by group
p3 <- group_stat_table_plot(surv_numerical_t2dm, "pafi_cal", "outcome")

# Exploration of lymphocytes by group
p4 <- group_stat_table_plot(surv_numerical_t2dm, "pao2", "outcome")

p1 + p2 + p3 + p4 + plot_annotation(tag_levels = 'A')

Correlation matrix (multicollinearity)

The correlation between independent variables will be explored in order to identify multicollinearity. The numeric variables are different, with the exception of white-cells and the PaO2:FiO2 ratio. In the case of using white-cells and neutrophils or lymphocytes, it’s possible that the information provided by white-cells may be redundant and generate biased estimates. The same applies to the PaO2:FiO2 ratio and the FiO2. Given the clinical relevance of the PaO₂:FiO₂ ratio, it is preferable to work with this ratio rather than the PaO₂ alone.

📝NOTE: ggplot objects can’t process markdown text

# Selection of numerical variables
data_num = data_match_eda |>
  dplyr::select(where(is.numeric), -weights) |>
  na.omit()

# Rename variables
cor_data = rename(
  data_num,
  "Age" = edad,
  "Respiratory rate" = frecuencia_respiratoria,
  "Heart rate" = frecuencia_cardiaca,
  "SBP" = p_a_sistolica,
  "DBP" = p_a_diastolica,
  "White-cells" = wbc,
  "Neutrophils" = neutrofilos,
  "Lymphocytes" = linfocitos,
  "NLR" = nlr,
  "Platelets" = platelets,
  "SaO2" = saturacion_de_oxigeno,
  "FiO2" = fio2,
  "PaO2:Fio2 ratio" = pafi,
  "Calculated\nPaO2:Fio2 ratio" = pafi_cal,
  "PaO2" = pao2,
  "Follow-up time (days)" = len_hosp_stay
)

# Correlation matrix
corr <- round(cor(cor_data), 1)

# Color visualization
FS1 <- my_ggcorrplor(corr)

# View
FS1

# Gray visualization
FS1_grey <- my_ggcorrplor_grey(corr)

Produce outputs

Variable selection to analyze

The variables with not enough events (<2.5%) were excluded from the analysis. These were alcoholism, antibiotics, asma_bronquial, cancer, disf_multiorg, dislip, ecv, epc, erc, f_hepatica_aguda, f_multiorganica, hemodialisis, hiv, immunsupressive_dis, perdida_de_peso, polidipysia, polifagia, poliuria, sensorio, and smoker. The variables ac_renal_failure, sepsis and shock_septico were also excluded due to possible misclassification bias.

# Select included variables
data_match <- var_selec_analysis(data_match_eda)
# Check included variables
included_var <- colnames(data_match)

# Check excluded variables
dplyr::setdiff(excluded_var, included_var)
##  [1] "alcoholism"          "antibiotics"         "asma_bronquial"     
##  [4] "cancer"              "disf_multiorg"       "dislip"             
##  [7] "ecv"                 "epc"                 "erc"                
## [10] "f_hepatica_aguda"    "f_multiorganica"     "hemodialisis"       
## [13] "hiv"                 "immunsupressive_dis" "perdida_de_peso"    
## [16] "polidipysia"         "polifagia"           "poliuria"           
## [19] "sensorio"            "smoker"
#unique(excluded_var, included_var)

Table 1. Demographic and clinical characteristics of patients

# Demographic characteristics and clinical history
tbl_1.1 <- data_match |>
  tbl_summary(
    include = c(edad:len_hosp_stay),
    by = t2dm,
    percent = "column",
    statistic = list(edad ~ "{mean} ({sd})"),
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_overall() |>
  add_p(edad ~ "t.test",
        test.args = all_tests("t.test") ~ list(var.equal = TRUE)) |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})",
                stat_0 = "**All patients** (n = {N})",
                p.value = "**p value**") |>
  modify_spanning_header(c(stat_1, stat_2) ~ "**Type 2 Diabetes**")

# Signs and symptoms
tbl_1.2 <- data_match |>
  tbl_summary(
    include = c(fever:dolor_abdominal, t2dm),
    by = t2dm,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_overall() |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

# Vital signs
tbl_1.3 <- data_match |>
  tbl_summary(
    include = c(frecuencia_respiratoria:p_a_diastolica, t2dm),
    by = t2dm,
    percent = "column",
    statistic = list(frecuencia_cardiaca ~ "{mean} ({sd})"),
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_overall() |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")
# Stack tables
table_1 = tbl_stack(
  list(tbl_1.1, tbl_1.2, tbl_1.3),
  group_header = c(
    "Demographic characteristics and clinical history",
    "Signs and symtoms",
    "Vital signs"
  ),
  quiet = TRUE
) |>
  modify_caption(
    "**Table 1**. Demographic and clinical characteristics of the patients"
    )

# Convert gtsummary object to a flextable object and format character columns
flex_table_1 <- table_1 |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
table_1
Table 1. Demographic and clinical characteristics of the patients
Characteristic All patients (n = 828)1 Type 2 Diabetes p value2
no (n = 621)1 yes (n = 207)1
Demographic characteristics and clinical history
Age (years) 59.6 (12.9) 59.6 (13.0) 59.6 (12.8) >0.99
Age (years)


0.78
    < 61 423 (51.1%) 319 (51.4%) 104 (50.2%)
    >= 61 405 (48.9%) 302 (48.6%) 103 (49.8%)
Sex


>0.99
    Female 288 (34.8%) 216 (34.8%) 72 (34.8%)
    Male 540 (65.2%) 405 (65.2%) 135 (65.2%)
Obesity 127 (15.3%) 89 (14.3%) 38 (18.4%) 0.16
Hypertension 203 (24.5%) 121 (19.5%) 82 (39.6%) <0.001
Follow-up time (days) 9.0 (5.0, 15.8) 9.0 (5.0, 16.0) 9.0 (4.0, 15.0) 0.33
Signs and symtoms
Fever 515 (62.4%) 400 (64.7%) 115 (55.6%) 0.018
Dry cought 654 (79.2%) 499 (80.6%) 155 (74.9%) 0.079
Sore throat 254 (30.8%) 187 (30.3%) 67 (32.4%) 0.57
General malaise 547 (66.2%) 400 (64.6%) 147 (71.0%) 0.092
Headache 173 (20.9%) 129 (20.8%) 44 (21.3%) 0.90
Tachypnea 267 (32.3%) 203 (32.8%) 64 (30.9%) 0.62
Dyspnea 710 (86.0%) 536 (86.6%) 174 (84.1%) 0.36
Anosmia 46 (5.6%) 32 (5.2%) 14 (6.8%) 0.39
Dysgeusia 29 (3.5%) 18 (2.9%) 11 (5.3%) 0.10
Lung crackles 395 (47.8%) 302 (48.8%) 93 (44.9%) 0.34
Diarrhea 49 (5.9%) 35 (5.7%) 14 (6.8%) 0.56
Vomiting 37 (4.5%) 26 (4.2%) 11 (5.3%) 0.50
Asthenia 111 (13.4%) 80 (12.9%) 31 (15.0%) 0.45
Abdominal pain 40 (4.8%) 28 (4.5%) 12 (5.8%) 0.46
Vital signs
Respiratory rate (BPM) 27.0 (23.0, 30.0) 28.0 (24.0, 30.0) 26.0 (23.0, 30.0) 0.045
Respiratory rate (BPM)


0.53
    24 - 30 402 (52.2%) 304 (52.8%) 98 (50.5%)
    < 24 195 (25.3%) 140 (24.3%) 55 (28.4%)
    > 30 173 (22.5%) 132 (22.9%) 41 (21.1%)
Heart rate (BPM) 97.7 (18.2) 96.7 (18.0) 100.8 (18.2) 0.010
Heart rate (BPM)


0.014
    < 100 425 (51.3%) 334 (53.8%) 91 (44.0%)
    >= 100 403 (48.7%) 287 (46.2%) 116 (56.0%)
SBP (mmHg) 110.0 (100.0, 120.0) 110.0 (100.0, 120.0) 120.0 (100.0, 130.0) 0.010
SBP (mmHg)


0.018
    < 140 737 (89.0%) 562 (90.5%) 175 (84.5%)
    >= 140 91 (11.0%) 59 (9.5%) 32 (15.5%)
DBP (mmHg) 70.0 (60.0, 73.5) 70.0 (60.0, 70.0) 70.0 (60.0, 80.0) 0.022
1 Mean (SD); n (%); Median (IQR)
2 Two Sample t-test; Pearson’s Chi-squared test; Wilcoxon rank sum test

Table 2. Laboratory findings and treatment of patients

# Laboratory findings
tbl_2.1 <- data_match |>
  tbl_summary(
    include = c(wbc:platelets.c, t2dm),
    by = t2dm,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_overall() |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})",
                stat_0 = "**All patients** (n = {N})",
                p.value = "**p value**") |>
  modify_spanning_header(all_stat_cols(stat_0 = FALSE) ~ "**Type 2 Diabetes**")

# Blood gas findings
tbl_2.2 <- data_match |>
  tbl_summary(
    include = c(saturacion_de_oxigeno:pao2.c, t2dm),
    by = t2dm,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_overall() |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

# Treatments
tbl_2.3 <- data_match |>
  tbl_summary(
    include = c(corticoides:pronacion, t2dm),
    by = t2dm,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_overall() |>
  add_p() |>
  bold_p(t = 0.05) |> 
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")
# Stack tables
table_2 = tbl_stack(
  list(tbl_2.1, tbl_2.2, tbl_2.3),
  group_header = c("Laboratory findings", "Blood gas findings", "Treatment"),
  quiet = TRUE
) |>
  modify_caption("Table 2. Laboratory findings and treatment of patients")

# Convert gtsummary object to a flextable object and format character columns
flex_table_2 <- table_2 |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
flex_table_2
Table 2. Laboratory findings and treatment of patients

Type 2 Diabetes

Group

Characteristic

All patients (n = 828)1

no (n = 621)1

yes (n = 207)1

p value2

Laboratory findings

WBC (×10−9/L)

11.0 (7.9, 15.6)

11.1 (7.9, 15.4)

10.7 (8.1, 15.9)

0.71

WBC (×10−9/L)

0.71

4-10

294 (37.7%)

220 (37.7%)

74 (37.8%)

< 4

32 (4.1%)

22 (3.8%)

10 (5.1%)

10

454 (58.2%)

342 (58.6%)

112 (57.1%)

Neutrophils (×10−9/L)

9.2 (6.1, 13.8)

9.3 (6.0, 13.6)

9.1 (6.4, 14.3)

0.67

Neutrophils (×10−9/L)

0.37

<= 6.3

265 (32.0%)

204 (32.9%)

61 (29.5%)

6.3

563 (68.0%)

417 (67.1%)

146 (70.5%)

Lymphocytes (×10−9/L)

0.9 (0.6, 1.4)

0.9 (0.6, 1.4)

0.9 (0.6, 1.3)

0.24

Lymphocytes (×10−9/L)

0.69

= 1

402 (48.6%)

299 (48.1%)

103 (49.8%)

< 1

426 (51.4%)

322 (51.9%)

104 (50.2%)

NLR

10.3 (5.6, 17.4)

10.2 (5.7, 16.6)

10.3 (5.1, 20.0)

0.38

NLR

0.77

< 5.86

205 (26.7%)

152 (26.4%)

53 (27.5%)

= 5.86

564 (73.3%)

424 (73.6%)

140 (72.5%)

Platelets (×10−9/L)

211.0 (105.5, 270.0)

211.0 (96.8, 268.0)

211.5 (136.0, 276.0)

0.39

Platelets (×10−9/L)

0.21

= 125

604 (72.9%)

446 (71.8%)

158 (76.3%)

< 125

224 (27.1%)

175 (28.2%)

49 (23.7%)

Blood gas findings

SaO2 (%)

89.0 (82.0, 93.0)

89.0 (80.0, 93.0)

90.0 (84.0, 94.0)

0.030

SaO2

0.041

= 90

401 (48.4%)

288 (46.4%)

113 (54.6%)

< 90

427 (51.6%)

333 (53.6%)

94 (45.4%)

FiO2 (%)

21.0 (21.0, 40.0)

21.0 (21.0, 40.0)

21.0 (21.0, 28.0)

0.26

FiO2 (%)

0.37

21 (O2 therapy)

228 (27.5%)

176 (28.3%)

52 (25.1%)

21

600 (72.5%)

445 (71.7%)

155 (74.9%)

Calculated PaO2:FiO2 ratio

173.0 (89.8, 278.0)

162.0 (84.6, 264.5)

197.0 (108.5, 310.0)

0.001

PaO2:FiO2 ratio

0.14

200

391 (47.2%)

284 (45.7%)

107 (51.7%)

<= 200

437 (52.8%)

337 (54.3%)

100 (48.3%)

PaO2:FiO2 ratio

3.0 (1.9, 3.9)

2.9 (1.8, 3.8)

3.2 (2.3, 4.1)

0.018

PaO2:FiO2 ratio

0.041

200

106 (12.8%)

88 (14.2%)

18 (8.7%)

<= 200

722 (87.2%)

533 (85.8%)

189 (91.3%)

PaO2 (mmHg)

71.8 (58.3, 91.7)

71.2 (57.0, 89.9)

73.1 (61.2, 94.9)

0.061

PaO2

0.11

= 60

617 (74.5%)

454 (73.1%)

163 (78.7%)

< 60

211 (25.5%)

167 (26.9%)

44 (21.3%)

Treatment

Corticosteroids

717 (86.6%)

553 (89.0%)

164 (79.2%)

<0.001

Anticoagulants

733 (88.5%)

549 (88.4%)

184 (88.9%)

0.85

Antimalarials

437 (52.8%)

327 (52.7%)

110 (53.1%)

0.90

Pronation

352 (46.4%)

271 (47.8%)

81 (42.2%)

0.18

1Median (IQR); n (%)

2Wilcoxon rank sum test; Pearson's Chi-squared test

Table 3. Demographic and clinical characteristics of patients by T2DM

Patients with T2DM

# Demographic characteristics and clinical history
tbl_3.1 <-
  data_match |> 
  dplyr::filter(t2dm == "yes") |>
  tbl_summary(
    include = c(edad:obesity, hta, outcome),
    by = outcome,
    percent = "column",
    statistic = list(edad ~ "{mean} ({sd})"),
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})",
                p.value = "**p value**")

# Signs and symptoms
tbl_3.2 <-
  data_match |> 
  dplyr::filter(t2dm == "yes") |>
  tbl_summary(
    include = c(fever:dolor_abdominal, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

# Vital signs
tbl_3.3 <-
  data_match |> 
  dplyr::filter(t2dm == "yes") |>
  tbl_summary(
    include = c(frecuencia_respiratoria:p_a_diastolica.c, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

Patients without T2DM

# Demographic characteristics and clinical history
tbl_3.4 <-
  data_match |>
  dplyr::filter(t2dm == "no") |>
  tbl_summary(
    include = c(edad:obesity, hta, outcome),
    by = outcome,
    percent = "column",
    statistic = list(edad ~ "{mean} ({sd})"),
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})",
                p.value = "**p value**")

# Signs and symptoms
tbl_3.5 <-
  data_match |> 
  dplyr::filter(t2dm == "no") |>
  tbl_summary(
    include = c(fever:dolor_abdominal, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

# Vital signs
tbl_3.6 <-
  data_match |> 
  dplyr::filter(t2dm == "no") |>
  tbl_summary(
    include = c(frecuencia_respiratoria:p_a_diastolica.c, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")
# Merge tables
tbl_3_p1 <- tbl_merge(
  tbls = list(tbl_3.1, tbl_3.4),
  tab_spanner = c("**Patients with T2DM**", "**Patients without T2DM**")
)

tbl_3_p2 <- tbl_merge(tbls = list(tbl_3.2, tbl_3.5)) |>
  modify_spanning_header(everything() ~ NA_character_)

tbl_3_p3 <- tbl_merge(tbls = list(tbl_3.3, tbl_3.6)) |>
  modify_spanning_header(everything() ~ NA_character_)

# Stack tables
table_3 = tbl_stack(
  list(tbl_3_p1, tbl_3_p2, tbl_3_p3),
  group_header = c(
    "Demographic characteristics and clinical history",
    "Signs and symtoms",
    "Vital signs"
  ),
  quiet = TRUE
) |> 
  modify_caption(
    "**Table 3**. Demographic and clinical characteristics of the patients by T2DM")

# Convert gtsummary object to a flextable object and format character columns
flex_table_3 <- table_3 |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
table_3
Table 3. Demographic and clinical characteristics of the patients by T2DM
Characteristic Patients with T2DM Patients without T2DM
Survivor (n = 114)1 Non-survivor (n = 93)1 p value2 Survivor (n = 323)1 Non-survivor (n = 298)1 p value2
Demographic characteristics and clinical history
Age (years) 56.5 (13.1) 63.5 (11.4) <0.001 55.5 (12.5) 64.2 (11.9) <0.001
Age (years)

<0.001

<0.001
    < 61 71 (62.3%) 33 (35.5%)
212 (65.6%) 107 (35.9%)
    >= 61 43 (37.7%) 60 (64.5%)
111 (34.4%) 191 (64.1%)
Sex

0.49

0.91
    Female 42 (36.8%) 30 (32.3%)
113 (35.0%) 103 (34.6%)
    Male 72 (63.2%) 63 (67.7%)
210 (65.0%) 195 (65.4%)
Obesity 23 (20.2%) 15 (16.1%) 0.45 42 (13.0%) 47 (15.8%) 0.33
Hypertension 36 (31.6%) 46 (49.5%) 0.009 54 (16.7%) 67 (22.5%) 0.070
Signs and symtoms
Fever 56 (49.1%) 59 (63.4%) 0.039 193 (59.8%) 207 (70.2%) 0.007
Dry cought 81 (71.1%) 74 (79.6%) 0.16 244 (75.5%) 255 (86.1%) <0.001
Sore throat 44 (38.6%) 23 (24.7%) 0.034 110 (34.1%) 77 (26.1%) 0.032
General malaise 87 (76.3%) 60 (64.5%) 0.063 200 (61.9%) 200 (67.6%) 0.14
Headache 36 (31.6%) 8 (8.6%) <0.001 79 (24.5%) 50 (16.9%) 0.021
Tachypnea 42 (36.8%) 22 (23.7%) 0.041 108 (33.4%) 95 (32.1%) 0.72
Dyspnea 87 (76.3%) 87 (93.5%) <0.001 262 (81.1%) 274 (92.6%) <0.001
Anosmia 10 (8.8%) 4 (4.3%) 0.20 22 (6.8%) 10 (3.4%) 0.054
Dysgeusia 7 (6.1%) 4 (4.3%) 0.76 11 (3.4%) 7 (2.4%) 0.44
Lung crackles 59 (51.8%) 34 (36.6%) 0.029 185 (57.3%) 117 (39.5%) <0.001
Diarrhea 6 (5.3%) 8 (8.6%) 0.34 19 (5.9%) 16 (5.4%) 0.80
Vomiting 4 (3.5%) 7 (7.5%) 0.23 16 (5.0%) 10 (3.4%) 0.33
Asthenia 22 (19.3%) 9 (9.7%) 0.054 53 (16.4%) 27 (9.1%) 0.007
Abdominal pain 8 (7.0%) 4 (4.3%) 0.41 21 (6.5%) 7 (2.4%) 0.013
Vital signs
Respiratory rate (BPM) 26.0 (22.0, 28.0) 26.0 (24.0, 30.0) 0.060 26.0 (22.0, 28.0) 28.5 (26.0, 32.0) <0.001
Respiratory rate (BPM)

0.15

<0.001
    24 - 30 51 (46.8%) 47 (55.3%)
152 (50.0%) 152 (55.9%)
    < 24 37 (33.9%) 18 (21.2%)
103 (33.9%) 37 (13.6%)
    > 30 21 (19.3%) 20 (23.5%)
49 (16.1%) 83 (30.5%)
Heart rate (BPM) 98.0 (83.0, 107.0) 108.0 (92.8, 115.3) <0.001 94.0 (82.0, 105.0) 100.0 (89.0, 110.0) <0.001
Heart rate (BPM)

0.002

<0.001
    < 100 61 (53.5%) 30 (32.3%)
197 (61.0%) 137 (46.0%)
    >= 100 53 (46.5%) 63 (67.7%)
126 (39.0%) 161 (54.0%)
SBP (mmHg) 113.0 (100.0, 123.8) 120.0 (100.0, 130.0) 0.22 110.0 (100.0, 120.0) 110.0 (100.0, 120.0) 0.94
SBP (mmHg)

0.010

0.003
    < 140 103 (90.4%) 72 (77.4%)
303 (93.8%) 259 (86.9%)
    >= 140 11 (9.6%) 21 (22.6%)
20 (6.2%) 39 (13.1%)
DBP (mmHg) 70.0 (60.0, 70.0) 70.0 (60.0, 80.0) 0.11 70.0 (60.0, 70.0) 70.0 (60.0, 70.0) 0.22
DBP (mmHg)

0.006

0.16
    < 90 110 (96.5%) 80 (86.0%)
307 (95.0%) 275 (92.3%)
    >= 90 4 (3.5%) 13 (14.0%)
16 (5.0%) 23 (7.7%)
1 Mean (SD); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test

Table 4. Laboratory findings and treatment of patients by T2DM

Patients with T2DM

# Laboratory findings
tbl_4.1 <-
  data_match |> 
  dplyr::filter(t2dm == "yes") |>
  tbl_summary(
    include = c(wbc:platelets.c, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})",
                p.value = "**p value**")

# Blood gas findings
tbl_4.2 <-
  data_match |> 
  dplyr::filter(t2dm == "yes") |>
  tbl_summary(
    include = c(saturacion_de_oxigeno:pao2.c, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

# Treatment
tbl_4.3 <-
  data_match |> 
  dplyr::filter(t2dm == "yes") |>
  tbl_summary(
    include = c(corticoides:pronacion, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

Patients without T2DM

# Laboratory findings
tbl_4.4 <-
  data_match |> 
  dplyr::filter(t2dm == "no") |>
  tbl_summary(
    include = c(wbc:platelets.c, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})",
                p.value = "**p value**")

# Blood gas findings
tbl_4.5 <-
  data_match |> 
  dplyr::filter(t2dm == "no") |>
  tbl_summary(
    include = c(saturacion_de_oxigeno:pao2.c, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")

# Treatment
tbl_4.6 <-
  data_match |> 
  dplyr::filter(t2dm == "no") |>
  tbl_summary(
    include = c(corticoides:pronacion, outcome),
    by = outcome,
    percent = "column",
    digits = list(all_continuous() ~ c(1, 1))
  ) |>
  add_p() |>
  bold_p(t = 0.05) |>
  modify_header(all_stat_cols() ~ "**{level}** (n = {n})")
# Merge tables
tbl_4_p1 <- tbl_merge(
  tbls = list(tbl_4.1, tbl_4.4),
  tab_spanner = c("**Patients with T2DM**", "**Patients without T2DM**")
)

tbl_4_p2 <- tbl_merge(tbls = list(tbl_4.2, tbl_4.5)) |>
  modify_spanning_header(everything() ~ NA_character_)

tbl_4_p3 <- tbl_merge(tbls = list(tbl_4.3, tbl_4.6)) |>
  modify_spanning_header(everything() ~ NA_character_)

# Stack tables
table_4 = tbl_stack(
  list(tbl_4_p1, tbl_4_p2, tbl_4_p3),
  group_header = c("Laboratory findings", "Blood gas findings", "Treatment"),
  quiet = TRUE
) |>
  modify_caption("Table 4. Laboratory findings and treatment of patients")


# Convert gtsummary object to a flextable object and format character columns
flex_table_4 <- table_4 |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
flex_table_4
Table 4. Laboratory findings and treatment of patients

Patients with T2DM

Patients without T2DM

Group

Characteristic

Survivor (n = 114)1

Non-survivor (n = 93)1

p value2

Survivor (n = 323)1

Non-survivor (n = 298)1

p value3

Laboratory findings

WBC (×10−9/L)

9.6 (6.9, 14.2)

12.4 (9.6, 17.3)

<0.001

10.0 (7.4, 13.9)

12.6 (9.2, 16.8)

<0.001

WBC (×10−9/L)

0.003

<0.001

4-10

52 (47.7%)

22 (25.3%)

146 (47.6%)

74 (26.7%)

< 4

6 (5.5%)

4 (4.6%)

9 (2.9%)

13 (4.7%)

10

51 (46.8%)

61 (70.1%)

152 (49.5%)

190 (68.6%)

Neutrophils (×10−9/L)

8.0 (5.0, 12.5)

11.2 (8.4, 16.0)

<0.001

8.0 (5.5, 11.8)

11.0 (7.4, 15.0)

<0.001

Neutrophils (×10−9/L)

0.004

<0.001

<= 6.3

43 (37.7%)

18 (19.4%)

127 (39.3%)

77 (25.8%)

6.3

71 (62.3%)

75 (80.6%)

196 (60.7%)

221 (74.2%)

Lymphocytes (×10−9/L)

0.9 (0.7, 1.4)

0.9 (0.4, 1.2)

0.009

1.1 (0.7, 1.6)

0.8 (0.6, 1.1)

<0.001

Lymphocytes (×10−9/L)

0.72

<0.001

= 1

58 (50.9%)

45 (48.4%)

189 (58.5%)

110 (36.9%)

< 1

56 (49.1%)

48 (51.6%)

134 (41.5%)

188 (63.1%)

NLR

8.4 (4.2, 15.5)

13.6 (8.1, 26.8)

<0.001

7.7 (4.2, 14.3)

12.8 (8.1, 19.7)

<0.001

NLR

0.001

<0.001

< 5.86

40 (36.7%)

13 (15.5%)

115 (38.0%)

37 (13.6%)

= 5.86

69 (63.3%)

71 (84.5%)

188 (62.0%)

236 (86.4%)

Platelets (×10−9/L)

183.5 (137.0, 216.0)

264.5 (120.5, 297.8)

<0.001

180.0 (100.8, 217.0)

257.5 (62.1, 292.5)

<0.001

Platelets (×10−9/L)

0.75

0.11

= 125

88 (77.2%)

70 (75.3%)

223 (69.0%)

223 (74.8%)

< 125

26 (22.8%)

23 (24.7%)

100 (31.0%)

75 (25.2%)

Blood gas findings

SaO2 (%)

92.0 (89.0, 95.0)

85.5 (79.0, 91.0)

<0.001

91.0 (86.0, 95.0)

85.0 (74.0, 90.0)

<0.001

SaO2

<0.001

<0.001

= 90

82 (71.9%)

31 (33.3%)

198 (61.3%)

90 (30.2%)

< 90

32 (28.1%)

62 (66.7%)

125 (38.7%)

208 (69.8%)

FiO2 (%)

21.0 (21.0, 28.0)

21.0 (21.0, 31.0)

0.69

21.0 (21.0, 21.0)

21.0 (21.0, 80.0)

<0.001

FiO2 (%)

0.91

<0.001

21 (O2 therapy)

29 (25.4%)

23 (24.7%)

73 (22.6%)

103 (34.6%)

21

85 (74.6%)

70 (75.3%)

250 (77.4%)

195 (65.4%)

Calculated PaO2:FiO2 ratio

249.0 (159.0, 338.0)

139.0 (84.4, 249.3)

<0.001

224.0 (130.0, 322.0)

105.5 (69.4, 196.5)

<0.001

PaO2:FiO2 ratio

<0.001

<0.001

200

72 (63.2%)

35 (37.6%)

201 (62.2%)

83 (27.9%)

<= 200

42 (36.8%)

58 (62.4%)

122 (37.8%)

215 (72.1%)

PaO2:FiO2 ratio

3.5 (2.6, 4.3)

2.8 (1.7, 3.7)

0.009

3.3 (2.5, 4.1)

2.5 (1.0, 3.5)

<0.001

PaO2:FiO2 ratio

0.65

0.058

200

9 (7.9%)

9 (9.7%)

54 (16.7%)

34 (11.4%)

<= 200

105 (92.1%)

84 (90.3%)

269 (83.3%)

264 (88.6%)

PaO2 (mmHg)

78.2 (67.2, 100.5)

67.4 (54.2, 86.6)

<0.001

77.2 (64.0, 95.6)

63.2 (49.2, 82.4)

<0.001

PaO2

<0.001

<0.001

= 60

102 (89.5%)

61 (65.6%)

278 (86.1%)

176 (59.1%)

< 60

12 (10.5%)

32 (34.4%)

45 (13.9%)

122 (40.9%)

Treatment

Corticosteroids

86 (75.4%)

78 (83.9%)

0.14

282 (87.3%)

271 (90.9%)

0.15

Anticoagulants

103 (90.4%)

81 (87.1%)

0.46

279 (86.4%)

270 (90.6%)

0.10

Antimalarials

61 (53.5%)

49 (52.7%)

0.91

177 (54.8%)

150 (50.3%)

0.27

Pronation

41 (41.4%)

40 (43.0%)

0.82

130 (47.8%)

141 (47.8%)

0.99

1Median (IQR); n (%)

2Wilcoxon rank sum test; Fisher's exact test; Pearson's Chi-squared test

3Wilcoxon rank sum test; Pearson's Chi-squared test

Table 5. Logistic regression

In the univariate analysis, variables that were self-reported or did not have enough events were eliminated; this will help to avoid overfitting in the subsequent multivariable analysis. Other variables such as sex, smoker, alcoholism, dislip, obesity, malestar_general, anosmia, disgeusia, emesis, diarrea, poliuria, polidipysia, sensorio, p_a_sistolica, p_a_diastolica, platelets.c, pafi_cal.c, antibiotics, corticoides, anticoagulantes, antipaludicos, pronacion, showed no differences between groups in the bivariate analysis (Tables 4 and 5).

Not analyzed variables
Self-reported Not enough event (<10)
dolor_de_garganta, malestar_general, headache, anosmia, disgeusia, diarrea, emesis, astenia, dolor_abdominal, perdida_de_peso, sensorio, poliuria, polidipysia, polifagia. smoker, alcoholism, dislip, ecv, cancer, hiv, immunsupressive_dis, erc, hemodialisis, asma_bronquial, epc, shock_septico, disf_multiorg, f_hepatica_aguda, f_multiorganica, perdida_de_peso, poliuria, polidipysia, polifagia, sensorio, antibiotics.
# Patients with T2DM
data_uv_dm <- var_selec_uv(data_match, "yes")

# Patients without T2DM
data_uv_nondm <- var_selec_uv(data_match, "no")

Univariate models

📝NOTE: The variables included in the univariate regression analysis were selected based on the results of the bivariate analysis.

Patients with T2DM

table_s1.1_uv <- data_uv_dm |>
  tbl_uvregression(
    include = c(edad.c:pronacion),
    y = outcome,
    method = glm,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    conf.int = TRUE,
    hide_n = TRUE,
    add_estimate_to_reference_rows = FALSE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      sex ~ "Sex",
      hta ~ "Hypertension",
      obesity ~ "Obesity",
      fever ~ "Fever",
      tos ~ "Dry cought",
      taquipnea ~ "Tachypnea",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      frecuencia_respiratoria.c ~ "Respiratory rate (BPM)",
      p_a_sistolica.c ~ "SBP (mmHg)",
      p_a_diastolica.c ~ "DBP (mmHg)",
      wbc.c ~ "White-cells (×10^−9^/L)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      linfocitos.c ~ "Lymphocytes (×10^−9^/L)",
      nlr.c ~ "NLR",
      platelets.c ~ "Platelets (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      fio2.c ~ "FiO~2~ (%)",
      pafi.c ~ "PaO~2~:FiO~2~ ratio",
      pao2.c ~ "PaO~2~",
      corticoides ~ "Corticosteroids",
      anticoagulantes ~ "Anticoagulants",
      antipaludicos ~ "Antimalarials",
      pronacion ~ "Pronation"
    )
  ) |>
  bold_labels() |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Univariate OR**", p.value = "**p value**")

Patients without T2DM

table_s1.2_uv <- data_uv_nondm |>
  tbl_uvregression(
    include = c(edad.c:pronacion),
    y = outcome,
    method = glm,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    conf.int = TRUE,
    hide_n = TRUE,
    add_estimate_to_reference_rows = FALSE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      sex ~ "Sex",
      hta ~ "Hypertension",
      obesity ~ "Obesity",
      fever ~ "Fever",
      tos ~ "Dry cought",
      taquipnea ~ "Tachypnea",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      frecuencia_respiratoria.c ~ "Respiratory rate (BPM)",
      p_a_sistolica.c ~ "SBP (mmHg)",
      p_a_diastolica.c ~ "DBP (mmHg)",
      wbc.c ~ "White-cells (×10^−9^/L)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      linfocitos.c ~ "Lymphocytes (×10^−9^/L)",
      nlr.c ~ "NLR",
      platelets.c ~ "Platelets (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      fio2.c ~ "FiO~2~ (%)",
      pafi.c ~ "PaO~2~:FiO~2~ ratio",
      pao2.c ~ "PaO~2~",
      corticoides ~ "Corticosteroids",
      anticoagulantes ~ "Anticoagulants",
      antipaludicos ~ "Antimalarials",
      pronacion ~ "Pronation"
    )
  ) |>
  bold_labels() |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Univariate OR**", p.value = "**p value**")
# Stack tables
table_uv = tbl_merge(
  list(table_s1.1_uv, table_s1.2_uv),
  tab_spanner = c("**Patients with T2DM**", "**Patients without T2DM**"
  )
) |>
  modify_caption("Table S1. Univariate logistic regression by T2DM")

# Convert gtsummary object to a flextable object and format character columns
flex_table_s1 <- table_uv |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
flex_table_s1
Table S1. Univariate logistic regression by T2DM

Patients with T2DM

Patients without T2DM

Characteristic

Univariate OR1

95% CI1

p value

Univariate OR1

95% CI1

p value

Age (years)

< 61

= 61

2.46

1.33, 4.62

0.005

3.36

2.32, 4.88

<0.001

Sex

Female

Male

1.28

0.68, 2.43

0.451

1.00

0.69, 1.44

0.982

Hypertension

No

Yes

1.91

1.02, 3.60

0.044

1.35

0.87, 2.11

0.182

Obesity

No

Yes

0.60

0.26, 1.33

0.215

1.31

0.79, 2.17

0.296

Fever

No

Yes

1.54

0.83, 2.87

0.174

1.85

1.27, 2.70

0.001

Dry cought

No

Yes

1.50

0.74, 3.11

0.264

2.20

1.37, 3.58

0.001

Tachypnea

No

Yes

0.55

0.27, 1.07

0.080

0.88

0.60, 1.29

0.521

Dyspnea

No

Yes

3.75

1.42, 11.79

0.013

2.84

1.60, 5.28

<0.001

Heart rate (BPM)

< 100

= 100

2.31

1.23, 4.39

0.010

1.91

1.33, 2.74

<0.001

Respiratory rate (BPM)

24 - 30

< 24

0.59

0.28, 1.21

0.154

0.38

0.24, 0.61

<0.001

30

1.20

0.55, 2.64

0.648

1.87

1.18, 3.02

0.009

SBP (mmHg)

< 140

= 140

2.98

1.24, 7.71

0.018

1.66

0.88, 3.24

0.126

DBP (mmHg)

< 90

= 90

4.94

1.48, 22.50

0.017

1.14

0.53, 2.48

0.739

White-cells (×10−9/L)

4-10

< 4

1.56

0.36, 6.13

0.530

2.75

1.10, 7.24

0.033

10

2.92

1.50, 5.85

0.002

2.42

1.66, 3.55

<0.001

Neutrophils (×10−9/L)

<= 6.3

6.3

4.03

1.84, 9.60

<0.001

2.20

1.47, 3.31

<0.001

Lymphocytes (×10−9/L)

= 1

< 1

1.17

0.64, 2.16

0.613

2.60

1.80, 3.78

<0.001

NLR

< 5.86

= 5.86

3.02

1.49, 6.45

0.003

3.73

2.43, 5.84

<0.001

Platelets (×10−9/L)

= 125

< 125

1.13

0.57, 2.24

0.718

0.70

0.48, 1.03

0.071

SaO2

= 90

< 90

4.72

2.49, 9.19

<0.001

3.53

2.43, 5.16

<0.001

FiO2 (%)

21 (O2 therapy)

21

1.02

0.51, 2.07

0.961

0.64

0.43, 0.94

0.025

PaO2:FiO2 ratio

200

<= 200

2.46

1.33, 4.62

0.005

3.74

2.57, 5.49

<0.001

PaO2

= 60

< 60

4.95

2.29, 11.52

<0.001

3.77

2.45, 5.91

<0.001

Corticosteroids

No

Yes

2.13

1.01, 4.72

0.052

1.79

1.01, 3.27

0.051

Anticoagulants

No

Yes

0.92

0.35, 2.44

0.867

1.76

1.00, 3.17

0.054

Antimalarials

No

Yes

1.23

0.67, 2.27

0.500

0.95

0.66, 1.35

0.760

Pronation

Yes

No

0.92

0.50, 1.70

0.797

0.90

0.63, 1.28

0.556

1OR = Odds Ratio, CI = Confidence Interval

Multivariable models

Complete multivariable for patients with T2DM

# T2DM model
full_multivariable_dm <- glm(
  outcome ~
    edad.c + sex + hta + obesity + fever + tos + taquipnea + disnea +
    frecuencia_cardiaca.c + frecuencia_respiratoria.c + p_a_sistolica.c +
    p_a_diastolica.c + wbc.c + neutrofilos.c + linfocitos.c + nlr.c +
    platelets.c + saturacion_de_oxigeno.c + fio2.c + pafi.c + pao2.c +
    corticoides + anticoagulantes + antipaludicos + pronacion,
  data = data_uv_dm,
  family = binomial(link = "logit")
) |>
  tbl_regression(
    exponentiate = TRUE,
    conf.int = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      sex ~ "Sex",
      hta ~ "Hypertension",
      obesity ~ "Obesity",
      fever ~ "Fever",
      tos ~ "Dry cought",
      taquipnea ~ "Tachypnea",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      frecuencia_respiratoria.c ~ "Respiratory rate (BPM)",
      p_a_sistolica.c ~ "SBP (mmHg)",
      p_a_diastolica.c ~ "DBP (mmHg)",
      wbc.c ~ "White-cells (×10^−9^/L)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      linfocitos.c ~ "Lymphocytes (×10^−9^/L)",
      nlr.c ~ "NLR",
      platelets.c ~ "Platelets (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      fio2.c ~ "FiO~2~ (%)",
      pafi.c ~ "PaO~2~:FiO~2~ ratio",
      pao2.c ~ "PaO~2~",
      corticoides ~ "Corticosteroids",
      anticoagulantes ~ "Anticoagulants",
      antipaludicos ~ "Antimalarials",
      pronacion ~ "Pronation"
    )
  ) |>
  bold_p(t = 0.05) |>
  add_vif() |>
  modify_header(estimate = "**Multivariable OR**", 
                p.value = "**p value**", 
                aGVIF ~ "**aGVIF**")
# Model
m1_dm = glm(
  outcome ~
    edad.c + sex + hta + obesity + fever + tos + taquipnea + disnea +
    frecuencia_cardiaca.c + frecuencia_respiratoria.c + p_a_sistolica.c +
    p_a_diastolica.c + wbc.c + neutrofilos.c + linfocitos.c + nlr.c +
    platelets.c + saturacion_de_oxigeno.c + fio2.c + pafi.c + pao2.c +
    corticoides + anticoagulantes + antipaludicos + pronacion,
  data = data_uv_dm,
  family = binomial(link = "logit")
)

# Visual check of model assumptions
performance::check_model(m1_dm)

# Model performance
broom::glance(m1_dm)
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          233.     168  -80.2  216.  304.     160.         141   169
# Indices of model performance
performance::model_performance(m1_dm)
## # Indices of model performance
## 
## AIC     |    AICc |     BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------
## 216.306 | 227.906 | 303.943 |     0.366 | 0.399 | 1.000 |    0.474 |   -69.168 |           0.011 | 0.686
# Check for multicollinearity
performance::check_collinearity(m1_dm)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                       Term  VIF   VIF 95% CI Increased SE Tolerance
##                     edad.c 1.30 [1.15, 1.58]         1.14      0.77
##                        sex 1.41 [1.24, 1.71]         1.19      0.71
##                        hta 1.37 [1.21, 1.66]         1.17      0.73
##                    obesity 1.26 [1.12, 1.53]         1.12      0.80
##                      fever 1.38 [1.21, 1.67]         1.17      0.73
##                        tos 1.67 [1.43, 2.03]         1.29      0.60
##                  taquipnea 1.20 [1.08, 1.47]         1.09      0.84
##                     disnea 1.40 [1.23, 1.70]         1.18      0.71
##      frecuencia_cardiaca.c 1.33 [1.18, 1.62]         1.15      0.75
##  frecuencia_respiratoria.c 1.76 [1.51, 2.15]         1.33      0.57
##            p_a_sistolica.c 1.50 [1.30, 1.82]         1.22      0.67
##           p_a_diastolica.c 1.42 [1.24, 1.72]         1.19      0.70
##                      wbc.c 3.00 [2.47, 3.72]         1.73      0.33
##              neutrofilos.c 2.52 [2.10, 3.12]         1.59      0.40
##               linfocitos.c 1.75 [1.50, 2.13]         1.32      0.57
##                      nlr.c 2.69 [2.22, 3.32]         1.64      0.37
##                platelets.c 1.38 [1.21, 1.68]         1.18      0.72
##    saturacion_de_oxigeno.c 2.14 [1.80, 2.63]         1.46      0.47
##                     fio2.c 1.45 [1.27, 1.76]         1.20      0.69
##                     pafi.c 1.72 [1.47, 2.09]         1.31      0.58
##                     pao2.c 1.20 [1.08, 1.47]         1.09      0.83
##                corticoides 1.52 [1.32, 1.85]         1.23      0.66
##            anticoagulantes 1.43 [1.25, 1.74]         1.20      0.70
##              antipaludicos 1.49 [1.30, 1.81]         1.22      0.67
##                  pronacion 1.33 [1.18, 1.62]         1.16      0.75
##  Tolerance 95% CI
##      [0.63, 0.87]
##      [0.58, 0.81]
##      [0.60, 0.83]
##      [0.65, 0.89]
##      [0.60, 0.83]
##      [0.49, 0.70]
##      [0.68, 0.92]
##      [0.59, 0.81]
##      [0.62, 0.85]
##      [0.46, 0.66]
##      [0.55, 0.77]
##      [0.58, 0.80]
##      [0.27, 0.41]
##      [0.32, 0.48]
##      [0.47, 0.67]
##      [0.30, 0.45]
##      [0.60, 0.82]
##      [0.38, 0.56]
##      [0.57, 0.79]
##      [0.48, 0.68]
##      [0.68, 0.92]
##      [0.54, 0.76]
##      [0.58, 0.80]
##      [0.55, 0.77]
##      [0.62, 0.85]

Complete multivariable for patients without T2DM

# Non T2DM model
full_multivariable_nondm <- glm(
  outcome ~
    edad.c + sex + hta + obesity + fever + tos + taquipnea + disnea +
    frecuencia_cardiaca.c + frecuencia_respiratoria.c + p_a_sistolica.c +
    p_a_diastolica.c + wbc.c + neutrofilos.c + linfocitos.c + nlr.c +
    platelets.c + saturacion_de_oxigeno.c + fio2.c + pafi.c + pao2.c +
    corticoides + anticoagulantes + antipaludicos + pronacion,
  data = data_uv_nondm,
  family = binomial(link = "logit")
) |>
  tbl_regression(
    exponentiate = TRUE,
    conf.int = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      sex ~ "Sex",
      hta ~ "Hypertension",
      obesity ~ "Obesity",
      fever ~ "Fever",
      tos ~ "Dry cought",
      taquipnea ~ "Tachypnea",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      frecuencia_respiratoria.c ~ "Respiratory rate (BPM)",
      p_a_sistolica.c ~ "SBP (mmHg)",
      p_a_diastolica.c ~ "DBP (mmHg)",
      wbc.c ~ "White-cells (×10^−9^/L)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      linfocitos.c ~ "Lymphocytes (×10^−9^/L)",
      nlr.c ~ "NLR",
      platelets.c ~ "Platelets (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      fio2.c ~ "FiO~2~ (%)",
      pafi.c ~ "PaO~2~:FiO~2~ ratio",
      pao2.c ~ "PaO~2~",
      corticoides ~ "Corticosteroids",
      anticoagulantes ~ "Anticoagulants",
      antipaludicos ~ "Antimalarials",
      pronacion ~ "Pronation"
    )
  ) |>
  bold_p(t = 0.05) |>
  add_vif() |>
  modify_header(estimate = "**Multivariable OR**",
                p.value = "**p value**",
                aGVIF ~ "**aGVIF**")
m1_non = glm(
  outcome ~
    edad.c + sex + hta + obesity + fever + tos + taquipnea + disnea +
    frecuencia_cardiaca.c + frecuencia_respiratoria.c + p_a_sistolica.c +
    p_a_diastolica.c + wbc.c + neutrofilos.c + linfocitos.c + nlr.c +
    platelets.c + saturacion_de_oxigeno.c + fio2.c + pafi.c + pao2.c +
    corticoides + anticoagulantes + antipaludicos + pronacion,
  data = data_uv_nondm,
  family = binomial(link = "logit")
)

# Visual check of model assumptions
performance::check_model(m1_non)

# Model performance
broom::glance(m1_non)
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          675.     486  -244.  543.  660.     487.         459   487
# Indices of model performance
performance::model_performance(m1_non)
## # Indices of model performance
## 
## AIC     |    AICc |     BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------
## 543.009 | 546.555 | 660.281 |     0.341 | 0.405 | 1.000 |    0.500 |      -Inf |           0.003 | 0.670
# Check for multicollinearity
performance::check_collinearity(m1_non)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                       Term  VIF   VIF 95% CI Increased SE Tolerance
##                     edad.c 1.13 [1.06, 1.29]         1.06      0.89
##                        sex 1.09 [1.03, 1.27]         1.05      0.92
##                        hta 1.22 [1.13, 1.38]         1.10      0.82
##                    obesity 1.14 [1.07, 1.30]         1.07      0.88
##                      fever 1.20 [1.11, 1.36]         1.10      0.83
##                        tos 1.46 [1.33, 1.66]         1.21      0.68
##                  taquipnea 1.22 [1.13, 1.38]         1.11      0.82
##                     disnea 1.64 [1.47, 1.86]         1.28      0.61
##      frecuencia_cardiaca.c 1.15 [1.08, 1.31]         1.07      0.87
##  frecuencia_respiratoria.c 1.47 [1.33, 1.66]         1.21      0.68
##            p_a_sistolica.c 1.39 [1.27, 1.57]         1.18      0.72
##           p_a_diastolica.c 1.36 [1.24, 1.54]         1.17      0.74
##                      wbc.c 2.56 [2.25, 2.95]         1.60      0.39
##              neutrofilos.c 2.47 [2.18, 2.84]         1.57      0.40
##               linfocitos.c 1.49 [1.35, 1.69]         1.22      0.67
##                      nlr.c 2.00 [1.78, 2.28]         1.41      0.50
##                platelets.c 1.35 [1.24, 1.53]         1.16      0.74
##    saturacion_de_oxigeno.c 1.56 [1.41, 1.77]         1.25      0.64
##                     fio2.c 1.26 [1.16, 1.42]         1.12      0.79
##                     pafi.c 1.27 [1.17, 1.44]         1.13      0.78
##                     pao2.c 1.20 [1.11, 1.36]         1.09      0.84
##                corticoides 1.60 [1.44, 1.82]         1.27      0.62
##            anticoagulantes 1.79 [1.60, 2.04]         1.34      0.56
##              antipaludicos 1.49 [1.35, 1.69]         1.22      0.67
##                  pronacion 1.18 [1.10, 1.34]         1.09      0.84
##  Tolerance 95% CI
##      [0.77, 0.95]
##      [0.79, 0.97]
##      [0.73, 0.89]
##      [0.77, 0.94]
##      [0.74, 0.90]
##      [0.60, 0.75]
##      [0.72, 0.88]
##      [0.54, 0.68]
##      [0.76, 0.93]
##      [0.60, 0.75]
##      [0.64, 0.79]
##      [0.65, 0.81]
##      [0.34, 0.44]
##      [0.35, 0.46]
##      [0.59, 0.74]
##      [0.44, 0.56]
##      [0.65, 0.81]
##      [0.57, 0.71]
##      [0.70, 0.86]
##      [0.70, 0.85]
##      [0.74, 0.90]
##      [0.55, 0.69]
##      [0.49, 0.62]
##      [0.59, 0.74]
##      [0.75, 0.91]
# Stack tables
table_mv = tbl_merge(
  tbls = list(
    table_s1.1_uv,
    full_multivariable_dm,
    table_s1.2_uv,
    full_multivariable_nondm
  )
) |>
  modify_spanning_header(
    c(estimate_1:aGVIF_2) ~ "**Patients with T2DM**",
    c(estimate_3:aGVIF_4) ~ "**Patients without T2DM**"
  ) |> 
  modify_footnote(everything() ~ NA, abbreviation = TRUE) |>
  modify_table_body( ~ .x |> arrange(row_type == "glance_statistic")) |>
  modify_footnote(
    c(estimate_1, estimate_2, estimate_3, estimate_4) ~ "OR = Odds Ratio",
    c(ci_1, ci_2, ci_3, ci_4) ~ "95% CI = 95% Confidence Interval",
    c(GVIF_2, GVIF_4) ~ "GVIF = Generalized Variance Inflation Factor",
    c(aGVIF_2, aGVIF_4) ~ "aGVIF = Adjusted GVIF or GVIF^[1/(2*df)]"
  )

# Convert gtsummary object to a flextable object and format character columns
flex_table_mv <- table_mv |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
flex_table_mv

Patients with T2DM

Patients without T2DM

Characteristic

Univariate OR1

95% CI2

p value

Multivariable OR1

95% CI2

p value

GVIF3

aGVIF4

Univariate OR1

95% CI2

p value

Multivariable OR1

95% CI2

p value

GVIF3

aGVIF4

Age (years)

1.3

1.1

1.1

1.1

< 61

= 61

2.46

1.33, 4.62

0.005

2.13

0.90, 5.19

0.089

3.36

2.32, 4.88

<0.001

3.22

2.03, 5.16

<0.001

Sex

1.4

1.2

1.1

1.0

Female

Male

1.28

0.68, 2.43

0.451

1.37

0.54, 3.58

0.509

1.00

0.69, 1.44

0.982

0.86

0.53, 1.39

0.534

Hypertension

1.4

1.2

1.2

1.1

No

Yes

1.91

1.02, 3.60

0.044

1.99

0.80, 5.10

0.144

1.35

0.87, 2.11

0.182

0.92

0.51, 1.66

0.768

Obesity

1.3

1.1

1.1

1.1

No

Yes

0.60

0.26, 1.33

0.215

0.78

0.24, 2.46

0.668

1.31

0.79, 2.17

0.296

1.26

0.67, 2.41

0.473

Fever

1.4

1.2

1.2

1.1

No

Yes

1.54

0.83, 2.87

0.174

1.50

0.61, 3.79

0.381

1.85

1.27, 2.70

0.001

1.76

1.06, 2.92

0.029

Dry cought

1.7

1.3

1.5

1.2

No

Yes

1.50

0.74, 3.11

0.264

0.52

0.16, 1.64

0.268

2.20

1.37, 3.58

0.001

1.66

0.82, 3.40

0.159

Tachypnea

1.2

1.1

1.2

1.1

No

Yes

0.55

0.27, 1.07

0.080

0.50

0.20, 1.24

0.141

0.88

0.60, 1.29

0.521

0.80

0.48, 1.34

0.395

Dyspnea

1.4

1.2

1.6

1.3

No

Yes

3.75

1.42, 11.79

0.013

1.76

0.44, 8.27

0.447

2.84

1.60, 5.28

<0.001

1.37

0.55, 3.47

0.498

Heart rate (BPM)

1.3

1.2

1.2

1.1

< 100

= 100

2.31

1.23, 4.39

0.010

2.45

1.02, 6.15

0.049

1.91

1.33, 2.74

<0.001

1.70

1.07, 2.74

0.026

Respiratory rate (BPM)

1.8

1.2

1.5

1.1

24 - 30

< 24

0.59

0.28, 1.21

0.154

1.25

0.43, 3.72

0.682

0.38

0.24, 0.61

<0.001

0.62

0.34, 1.14

0.123

30

1.20

0.55, 2.64

0.648

0.68

0.23, 1.91

0.465

1.87

1.18, 3.02

0.009

1.30

0.72, 2.36

0.387

SBP (mmHg)

1.5

1.2

1.4

1.2

< 140

= 140

2.98

1.24, 7.71

0.018

1.22

0.27, 5.43

0.793

1.66

0.88, 3.24

0.126

2.59

1.03, 6.73

0.045

DBP (mmHg)

1.4

1.2

1.4

1.2

< 90

= 90

4.94

1.48, 22.50

0.017

2.65

0.36, 23.66

0.350

1.14

0.53, 2.48

0.739

0.48

0.16, 1.45

0.189

White-cells (×10−9/L)

3.0

1.3

2.6

1.3

4-10

< 4

1.56

0.36, 6.13

0.530

4.35

0.46, 41.69

0.195

2.75

1.10, 7.24

0.033

2.70

0.83, 9.09

0.100

10

2.92

1.50, 5.85

0.002

1.40

0.49, 4.05

0.530

2.42

1.66, 3.55

<0.001

1.11

0.57, 2.13

0.760

Neutrophils (×10−9/L)

2.5

1.6

2.5

1.6

<= 6.3

6.3

4.03

1.84, 9.60

<0.001

3.47

0.76, 18.35

0.121

2.20

1.47, 3.31

<0.001

1.04

0.47, 2.25

0.928

Lymphocytes (×10−9/L)

1.7

1.3

1.5

1.2

= 1

< 1

1.17

0.64, 2.16

0.613

0.60

0.21, 1.64

0.325

2.60

1.80, 3.78

<0.001

1.50

0.87, 2.57

0.142

NLR

2.7

1.6

2.0

1.4

< 5.86

= 5.86

3.02

1.49, 6.45

0.003

2.15

0.53, 9.13

0.289

3.73

2.43, 5.84

<0.001

2.81

1.36, 5.90

0.006

Platelets (×10−9/L)

1.4

1.2

1.4

1.2

= 125

< 125

1.13

0.57, 2.24

0.718

0.76

0.27, 2.06

0.587

0.70

0.48, 1.03

0.071

0.43

0.24, 0.74

0.003

SaO2

2.1

1.5

1.6

1.2

= 90

< 90

4.72

2.49, 9.19

<0.001

5.33

1.80, 17.28

0.003

3.53

2.43, 5.16

<0.001

2.08

1.21, 3.62

0.009

FiO2 (%)

1.5

1.2

1.3

1.1

21 (O2 therapy)

21

1.02

0.51, 2.07

0.961

0.50

0.17, 1.44

0.203

0.64

0.43, 0.94

0.025

0.54

0.31, 0.93

0.029

PaO2:FiO2 ratio

1.7

1.3

1.3

1.1

200

<= 200

2.46

1.33, 4.62

0.005

0.89

0.32, 2.40

0.817

3.74

2.57, 5.49

<0.001

2.25

1.37, 3.71

0.001

PaO2

1.2

1.1

1.2

1.1

= 60

< 60

4.95

2.29, 11.52

<0.001

3.35

1.17, 10.42

0.029

3.77

2.45, 5.91

<0.001

1.82

1.04, 3.24

0.038

Corticosteroids

1.5

1.2

1.6

1.3

No

Yes

2.13

1.01, 4.72

0.052

2.54

0.82, 8.34

0.113

1.79

1.01, 3.27

0.051

1.04

0.40, 2.67

0.939

Anticoagulants

1.4

1.2

1.8

1.3

No

Yes

0.92

0.35, 2.44

0.867

0.36

0.08, 1.58

0.181

1.76

1.00, 3.17

0.054

0.54

0.19, 1.40

0.213

Antimalarials

1.5

1.2

1.5

1.2

No

Yes

1.23

0.67, 2.27

0.500

0.47

0.18, 1.17

0.116

0.95

0.66, 1.35

0.760

0.48

0.28, 0.81

0.007

Pronation

1.3

1.2

1.2

1.1

Yes

No

0.92

0.50, 1.70

0.797

1.51

0.63, 3.75

0.358

0.90

0.63, 1.28

0.556

0.97

0.60, 1.56

0.888

1OR = Odds Ratio

295% CI = 95% Confidence Interval

3GVIF = Generalized Variance Inflation Factor

4aGVIF = Adjusted GVIF or GVIF^[1/(2*df)]

Multivariate reduced: backward for patients with T2DM

backward_dm <- m1_dm |>
  stats::step(direction = "backward", trace = FALSE)
m2_dm <-
  glm(
    outcome ~ edad.c + hta + taquipnea + frecuencia_cardiaca.c + p_a_diastolica.c +
      neutrofilos.c + saturacion_de_oxigeno.c + pao2.c + antipaludicos,
    family = binomial(link = "logit"),
    data = data_uv_dm
  )

# Visual check of model assumptions
performance::check_model(m2_dm)

# Model performance
broom::glance(m2_dm)
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          233.     168  -86.4  193.  224.     173.         159   169
# Indices of model performance
performance::model_performance(m2_dm)
## # Indices of model performance
## 
## AIC     |    AICc |     BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------
## 192.818 | 194.210 | 224.117 |     0.314 | 0.413 | 1.000 |    0.511 |   -62.196 |           0.009 | 0.659
# Check for multicollinearity
performance::check_collinearity(m2_dm)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                     Term  VIF   VIF 95% CI Increased SE Tolerance
##                   edad.c 1.10 [1.02, 1.53]         1.05      0.91
##                      hta 1.08 [1.01, 1.63]         1.04      0.93
##                taquipnea 1.04 [1.00, 3.27]         1.02      0.97
##    frecuencia_cardiaca.c 1.14 [1.04, 1.50]         1.07      0.88
##         p_a_diastolica.c 1.05 [1.00, 2.03]         1.03      0.95
##            neutrofilos.c 1.08 [1.01, 1.59]         1.04      0.92
##  saturacion_de_oxigeno.c 1.09 [1.02, 1.55]         1.05      0.91
##                   pao2.c 1.09 [1.01, 1.56]         1.04      0.92
##            antipaludicos 1.24 [1.10, 1.57]         1.11      0.80
##  Tolerance 95% CI
##      [0.65, 0.98]
##      [0.61, 0.99]
##      [0.31, 1.00]
##      [0.67, 0.96]
##      [0.49, 1.00]
##      [0.63, 0.99]
##      [0.65, 0.98]
##      [0.64, 0.99]
##      [0.64, 0.91]

Multivariate reduced: backward for patients without T2DM

backward_non <- m1_non |>
  stats::step(direction = "backward", trace = FALSE)
m2_non <-
  glm(
    outcome ~ edad.c + fever + tos + frecuencia_cardiaca.c + p_a_sistolica.c + 
      linfocitos.c + nlr.c + platelets.c + saturacion_de_oxigeno.c + fio2.c + 
      pafi.c + pao2.c + antipaludicos,
    family = binomial(link = "logit"),
    data = data_uv_nondm
  )

# Visual check of model assumptions
performance::check_model(m2_non)

# Model performance
broom::glance(m2_non)
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          675.     486  -250.  527.  586.     499.         473   487
# Indices of model performance
performance::model_performance(m2_non)
## # Indices of model performance
## 
## AIC     |    AICc |     BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------
## 527.285 | 528.175 | 585.921 |     0.321 | 0.411 | 1.000 |    0.513 |      -Inf |           0.003 | 0.661
# Check for multicollinearity
performance::check_collinearity(m2_non)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                     Term  VIF   VIF 95% CI Increased SE Tolerance
##                   edad.c 1.05 [1.01, 1.35]         1.02      0.96
##                    fever 1.11 [1.04, 1.28]         1.05      0.90
##                      tos 1.17 [1.09, 1.33]         1.08      0.85
##    frecuencia_cardiaca.c 1.08 [1.02, 1.28]         1.04      0.93
##          p_a_sistolica.c 1.03 [1.00, 1.62]         1.01      0.97
##             linfocitos.c 1.26 [1.16, 1.43]         1.12      0.79
##                    nlr.c 1.28 [1.17, 1.45]         1.13      0.78
##              platelets.c 1.27 [1.16, 1.44]         1.13      0.79
##  saturacion_de_oxigeno.c 1.40 [1.27, 1.59]         1.18      0.71
##                   fio2.c 1.18 [1.09, 1.34]         1.09      0.85
##                   pafi.c 1.19 [1.10, 1.36]         1.09      0.84
##                   pao2.c 1.14 [1.06, 1.30]         1.07      0.88
##            antipaludicos 1.33 [1.21, 1.51]         1.15      0.75
##  Tolerance 95% CI
##      [0.74, 0.99]
##      [0.78, 0.96]
##      [0.75, 0.92]
##      [0.78, 0.98]
##      [0.62, 1.00]
##      [0.70, 0.86]
##      [0.69, 0.85]
##      [0.70, 0.86]
##      [0.63, 0.79]
##      [0.74, 0.92]
##      [0.74, 0.91]
##      [0.77, 0.94]
##      [0.66, 0.82]

Parsimonious model for patients with T2DM

In these models, only those variables that were statistically significant in previous analyses or that are clinically relevant according to the available evidence will be included. In subsequent analyses, some of the following variables may be excluded due to the potential for multicollinearity and to avoid overfitting: p_a_sistolica.c, p_a_diastolic.c, wbc.c, neutrofilos.c, lymphocytes.c, nlr.c, fio2.c, and pao2.c. The exclusion of fever and cough is based on the observation that these symptoms lack specificity and are commonly observed in the context of most infections.

# Parsimonious formula
m3_dm <-
  glm(
    outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + neutrofilos.c +
      saturacion_de_oxigeno.c + pafi.c,
    family = binomial(link = "logit"),
    data = data_uv_dm
  )

# Visual check of model assumptions
performance::check_model(m3_dm)

# Model performance
broom::glance(m3_dm)
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          233.     168  -93.5  203.  228.     187.         161   169
# Indices of model performance
performance::model_performance(m3_dm)
## # Indices of model performance
## 
## AIC     |    AICc |     BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------
## 203.064 | 203.964 | 228.104 |     0.244 | 0.434 | 1.000 |    0.553 |   -55.236 |           0.010 | 0.625
# Check for multicollinearity
performance::check_collinearity(m3_dm)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                     Term  VIF   VIF 95% CI Increased SE Tolerance
##                   edad.c 1.07 [1.01, 1.74]         1.03      0.94
##                      hta 1.09 [1.01, 1.58]         1.04      0.92
##                   disnea 1.03 [1.00, 8.87]         1.01      0.97
##    frecuencia_cardiaca.c 1.06 [1.01, 1.78]         1.03      0.94
##            neutrofilos.c 1.05 [1.00, 2.38]         1.02      0.96
##  saturacion_de_oxigeno.c 1.32 [1.15, 1.66]         1.15      0.76
##                   pafi.c 1.29 [1.14, 1.63]         1.14      0.77
##  Tolerance 95% CI
##      [0.58, 0.99]
##      [0.63, 0.99]
##      [0.11, 1.00]
##      [0.56, 0.99]
##      [0.42, 1.00]
##      [0.60, 0.87]
##      [0.61, 0.88]

Parsimonious model for patients without T2DM

m3_non <-
  glm(
    outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + nlr.c +
      saturacion_de_oxigeno.c + pafi.c,
    family = binomial(link = "logit"),
    data = data_uv_nondm
  )

# Visual check of model assumptions
performance::check_model(m3_non)

# Model performance
broom::glance(m3_non)
## # A tibble: 1 × 8
##   null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
##           <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
## 1          675.     486  -270.  555.  589.     539.         479   487
# Indices of model performance
performance::model_performance(m3_non)
## # Indices of model performance
## 
## AIC     |    AICc |     BIC | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------
## 555.007 | 555.308 | 588.513 |     0.253 | 0.432 | 1.000 |    0.553 |  -211.843 |           0.006 | 0.626
# Check for multicollinearity
performance::check_collinearity(m3_non)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                     Term  VIF   VIF 95% CI Increased SE Tolerance
##                   edad.c 1.05 [1.01, 1.33]         1.03      0.95
##                      hta 1.06 [1.01, 1.30]         1.03      0.94
##                   disnea 1.07 [1.02, 1.29]         1.03      0.94
##    frecuencia_cardiaca.c 1.05 [1.01, 1.36]         1.02      0.96
##                    nlr.c 1.05 [1.01, 1.34]         1.02      0.95
##  saturacion_de_oxigeno.c 1.14 [1.07, 1.31]         1.07      0.88
##                   pafi.c 1.10 [1.03, 1.28]         1.05      0.91
##  Tolerance 95% CI
##      [0.75, 0.99]
##      [0.77, 0.99]
##      [0.78, 0.98]
##      [0.74, 0.99]
##      [0.75, 0.99]
##      [0.76, 0.94]
##      [0.78, 0.97]

Final model for patients with T2DM

table_5.1_uv <- data_uv_dm |>
  tbl_uvregression(
    include = c(
      edad.c,
      hta,
      disnea,
      frecuencia_cardiaca.c,
      neutrofilos.c,
      saturacion_de_oxigeno.c,
      pafi.c
    ),
    y = outcome,
    method = glm,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    conf.int = TRUE,
    hide_n = TRUE,
    add_estimate_to_reference_rows = FALSE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      pafi.c ~ "PaO~2~:FiO~2~ ratio"
    )
  ) |>
  bold_labels() |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Univariate OR**", p.value = "**p value**")
table_5.1_mv <-
  glm(
    outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + neutrofilos.c +
      saturacion_de_oxigeno.c + pafi.c,
    family = binomial(link = "logit"),
    data = data_uv_dm
  ) |>
  tbl_regression(
    conf.int = TRUE,
    exponentiate = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      pafi.c ~ "PaO~2~:FiO~2~ ratio"
    )
  ) |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Multivariable OR**", p.value = "**p value** ")

Final model for patients without T2DM

table_5.2_uv <- data_uv_nondm |>
  tbl_uvregression(
    include = c(
      edad.c,
      hta,
      disnea,
      frecuencia_cardiaca.c,
      neutrofilos.c,
      saturacion_de_oxigeno.c,
      pafi.c
    ),
    y = outcome,
    method = glm,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    conf.int = TRUE,
    hide_n = TRUE,
    add_estimate_to_reference_rows = FALSE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      pafi.c ~ "PaO~2~:FiO~2~ ratio"
    )
  ) |>
  bold_labels() |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Univariate OR**", p.value = "**p value**")
table_5.2_mv <-
  glm(
    outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + neutrofilos.c +
      saturacion_de_oxigeno.c + pafi.c,
    family = binomial(link = "logit"),
    data = data_uv_nondm
  ) |>
  tbl_regression(
    conf.int = TRUE,
    exponentiate = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad.c ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca.c ~ "Heart rate (BPM)",
      neutrofilos.c ~ "Neutrophils (×10^−9^/L)",
      saturacion_de_oxigeno.c ~ "SaO~2~",
      pafi.c ~ "PaO~2~:FiO~2~ ratio"
    )
  ) |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Multivariable OR**", p.value = "**p value** ")
# Number of observations
count.dm <- nrow(data_uv_dm)
count.non <- nrow(data_uv_nondm)

# Merge tables
table_5 <-
  tbl_merge(tbls = list(table_5.1_uv, table_5.1_mv,
                        table_5.2_uv, table_5.2_mv)) |>
  modify_spanning_header(list(
    c(estimate_1:p.value_2) ~ "**Patients with T2DM** (N = {paste(count.dm)})",
    c(estimate_3:p.value_4) ~ "**Patients without T2DM** (N = {paste(count.non)})"
  )) |>
  modify_footnote(everything() ~ NA, abbreviation = TRUE) |>
  modify_footnote(
    c(estimate_1, estimate_2, estimate_3, estimate_4) ~ "OR = Odds Ratio",
    c(ci_1, ci_2, ci_3, ci_4) ~ "95% CI = 95% Confidence Interval"
  ) |>
  modify_caption("Table 5. Multivariable logistic regression by T2DM")

# Convert gtsummary object to a flextable object and format character columns
flex_table_5 <- table_5 |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
flex_table_5
Table 5. Multivariable logistic regression by T2DM

Patients with T2DM (N = 169)

Patients without T2DM (N = 487)

Characteristic

Univariate OR1

95% CI2

p value

Multivariable OR1

95% CI2

p value

Univariate OR1

95% CI2

p value

Multivariable OR1

95% CI2

p value

Age (years)

< 61

= 61

2.46

1.33, 4.62

0.005

2.18

1.06, 4.53

0.034

3.36

2.32, 4.88

<0.001

3.44

2.29, 5.24

<0.001

Hypertension

No

Yes

1.91

1.02, 3.60

0.044

1.95

0.93, 4.18

0.079

1.35

0.87, 2.11

0.182

0.99

0.60, 1.66

0.981

Dyspnea

No

Yes

3.75

1.42, 11.79

0.013

2.07

0.68, 7.20

0.217

2.84

1.60, 5.28

<0.001

1.60

0.81, 3.23

0.180

Heart rate (BPM)

< 100

= 100

2.31

1.23, 4.39

0.010

2.06

1.00, 4.35

0.053

1.91

1.33, 2.74

<0.001

1.77

1.17, 2.68

0.007

Neutrophils (×10−9/L)

<= 6.3

6.3

4.03

1.84, 9.60

<0.001

3.19

1.34, 8.22

0.011

2.20

1.47, 3.31

<0.001

1.56

0.98, 2.49

0.060

SaO2

= 90

< 90

4.72

2.49, 9.19

<0.001

2.94

1.33, 6.66

0.008

3.53

2.43, 5.16

<0.001

2.00

1.30, 3.09

0.002

PaO2:FiO2 ratio

200

<= 200

2.46

1.33, 4.62

0.005

1.25

0.55, 2.74

0.587

3.74

2.57, 5.49

<0.001

2.57

1.67, 3.97

<0.001

1OR = Odds Ratio

295% CI = 95% Confidence Interval

Model comparison

T2DM patient models

# Compare performance of different models
compare_performance(m1_dm, m2_dm, m3_dm, verbose = FALSE)
## # Comparison of Model Performance Indices
## 
## Name  | Model | AIC (weights) | AICc (weights) | BIC (weights) | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## -------------------------------------------------------------------------------------------------------------------------------------------
## m1_dm |   glm | 216.3 (<.001) |  227.9 (<.001) | 303.9 (<.001) |     0.366 | 0.399 | 1.000 |    0.474 |   -69.168 |           0.011 | 0.686
## m2_dm |   glm | 192.8 (0.994) |  194.2 (0.992) | 224.1 (0.880) |     0.314 | 0.413 | 1.000 |    0.511 |   -62.196 |           0.009 | 0.659
## m3_dm |   glm | 203.1 (0.006) |  204.0 (0.008) | 228.1 (0.120) |     0.244 | 0.434 | 1.000 |    0.553 |   -55.236 |           0.010 | 0.625
# Radar plot
plot(compare_performance(m1_dm, m2_dm, m3_dm, rank = TRUE, verbose = FALSE))

# Likelihood Ratio Test
lmtest::lrtest(m2_dm, m3_dm)
## Likelihood ratio test
## 
## Model 1: outcome ~ edad.c + hta + taquipnea + frecuencia_cardiaca.c + 
##     p_a_diastolica.c + neutrofilos.c + saturacion_de_oxigeno.c + 
##     pao2.c + antipaludicos
## Model 2: outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + neutrofilos.c + 
##     saturacion_de_oxigeno.c + pafi.c
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1  10 -86.409                         
## 2   8 -93.532 -2 14.247  0.0008061 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

non-T2DM patient models

# Compare performance of different models
compare_performance(m1_non, m2_non, m3_non, verbose = FALSE)
## # Comparison of Model Performance Indices
## 
## Name   | Model | AIC (weights) | AICc (weights) | BIC (weights) | Tjur's R2 |  RMSE | Sigma | Log_loss | Score_log | Score_spherical |   PCP
## --------------------------------------------------------------------------------------------------------------------------------------------
## m1_non |   glm | 543.0 (<.001) |  546.6 (<.001) | 660.3 (<.001) |     0.341 | 0.405 | 1.000 |    0.500 |      -Inf |           0.003 | 0.670
## m2_non |   glm | 527.3 (>.999) |  528.2 (>.999) | 585.9 (0.785) |     0.321 | 0.411 | 1.000 |    0.513 |      -Inf |           0.003 | 0.661
## m3_non |   glm | 555.0 (<.001) |  555.3 (<.001) | 588.5 (0.215) |     0.253 | 0.432 | 1.000 |    0.553 |  -211.843 |           0.006 | 0.626
# Radar plot
plot(compare_performance(m1_non, m2_non, m3_non, rank = TRUE, verbose = FALSE))

# Likelihood Ratio Test
lmtest::lrtest(m2_non, m3_non)
## Likelihood ratio test
## 
## Model 1: outcome ~ edad.c + fever + tos + frecuencia_cardiaca.c + p_a_sistolica.c + 
##     linfocitos.c + nlr.c + platelets.c + saturacion_de_oxigeno.c + 
##     fio2.c + pafi.c + pao2.c + antipaludicos
## Model 2: outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + nlr.c + 
##     saturacion_de_oxigeno.c + pafi.c
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1  14 -249.64                         
## 2   8 -269.50 -6 39.721  5.167e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sensitivity analysis

Sensitivity to continuous data categorization

In this sensitivity analysis, the analyses will be repeated with continuous data to ensure that there are no differences in the conclusions. While the categorization of continuous variables has the disadvantage of loss of information, it has the advantage of assuming linearity and being easily interpreted by the non-specialized public.

# Patients with T2DM
data_sens_dm <- var_selec_sens(data_match, t2dm, "yes")

# Patients without T2DM
data_sens_nondm <- var_selec_sens(data_match, t2dm, "no")

Patients with T2DM

Univariate regression analysis

table_sens_uv_dm <- data_sens_dm |>
  tbl_uvregression(
    include = c(edad:pao2),
    y = outcome,
    method = glm,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    conf.int = TRUE,
    hide_n = TRUE,
    add_estimate_to_reference_rows = FALSE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca ~ "Heart rate (BPM)",
      frecuencia_respiratoria ~ "Respiratory rate (BPM)",
      p_a_sistolica ~ "SBP (mmHg)",
      p_a_diastolica ~ "DBP (mmHg)",
      wbc ~ "White-cells (×10^−9^/L)",
      neutrofilos ~ "Neutrophils (×10^−9^/L)",
      linfocitos ~ "Lymphocytes (×10^−9^/L)",
      nlr ~ "NLR",
      platelets ~ "Platelets (×10^−9^/L)",
      saturacion_de_oxigeno ~ "SaO~2~",
      fio2 ~ "FiO~2~ (%)",
      pafi ~ "PaO~2~:FiO~2~ ratio",
      pao2 ~ "PaO~2~"
    )
  ) |>
  bold_labels() |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Univariate OR**", p.value = "**p value**")

Multivariable regression analysis

table_sens_mv_dm <-
  glm(
    outcome ~ edad + hta + disnea + frecuencia_cardiaca + neutrofilos +
      saturacion_de_oxigeno + pafi,
    family = binomial(link = "logit"),
    data = data_sens_dm
  ) |>
  tbl_regression(
    conf.int = TRUE,
    exponentiate = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca ~ "Heart rate (BPM)",
      neutrofilos ~ "Neutrophils (×10^−9^/L)",
      saturacion_de_oxigeno ~ "SaO~2~",
      pafi ~ "PaO~2~:FiO~2~ ratio"
    )
  ) |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Multivariable OR**", p.value = "**p value** ")

Patients without T2DM

Univariate regression analysis with continuous data

table_sens_uv_non <- data_sens_nondm |>
  tbl_uvregression(
    include = c(edad:pao2),
    y = outcome,
    method = glm,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    conf.int = TRUE,
    hide_n = TRUE,
    add_estimate_to_reference_rows = FALSE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca ~ "Heart rate (BPM)",
      frecuencia_respiratoria ~ "Respiratory rate (BPM)",
      p_a_sistolica ~ "SBP (mmHg)",
      p_a_diastolica ~ "DBP (mmHg)",
      wbc ~ "White-cells (×10^−9^/L)",
      neutrofilos ~ "Neutrophils (×10^−9^/L)",
      linfocitos ~ "Lymphocytes (×10^−9^/L)",
      nlr ~ "NLR",
      platelets ~ "Platelets (×10^−9^/L)",
      saturacion_de_oxigeno ~ "SaO~2~",
      fio2 ~ "FiO~2~ (%)",
      pafi ~ "PaO~2~:FiO~2~ ratio",
      pao2 ~ "PaO~2~"
    )
  ) |>
  bold_labels() |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Univariate OR**", p.value = "**p value**")

Multivariable regression analysis with continuous data

table_sens_mv_non <-
  glm(
    outcome ~ edad + hta + disnea + frecuencia_cardiaca + neutrofilos +
      saturacion_de_oxigeno + pafi,
    family = binomial(link = "logit"),
    data = data_sens_nondm
  ) |>
  tbl_regression(
    conf.int = TRUE,
    exponentiate = TRUE,
    pvalue_fun = ~ style_pvalue(.x, digits = 3),
    estimate_fun = ~ style_number(.x, digits = 2),
    label = list(
      edad ~ "Age (years)",
      hta ~ "Hypertension",
      disnea ~ "Dyspnea",
      frecuencia_cardiaca ~ "Heart rate (BPM)",
      neutrofilos ~ "Neutrophils (×10^−9^/L)",
      saturacion_de_oxigeno ~ "SaO~2~",
      pafi ~ "PaO~2~:FiO~2~ ratio"
    )
  ) |>
  bold_p(t = 0.05) |>
  modify_header(estimate = "**Multivariable OR**", p.value = "**p value** ")
# Number of observations
counts.dm <- nrow(data_sens_dm)
counts.non <- nrow(data_sens_nondm)

# Merge tables
table_s2 <-
  tbl_merge(tbls = list(
    table_sens_uv_dm,
    table_sens_mv_dm,
    table_sens_uv_non,
    table_sens_mv_non
  )) |>
  modify_spanning_header(list(
    c(estimate_1:p.value_2) ~ "**Patients with T2DM** (N = {paste(counts.dm)})",
    c(estimate_3:p.value_4) ~ "**Patients without T2DM** (N = {paste(counts.non)})"
  )) |>
  modify_footnote(everything() ~ NA, abbreviation = TRUE) |>
  modify_footnote(
    c(estimate_1, estimate_2, estimate_3, estimate_4) ~ "OR = Odds Ratio",
    c(ci_1, ci_2, ci_3, ci_4) ~ "95% CI = 95% Confidence Interval"
  ) |>
  modify_caption("Table S2. Sensitivity to continuous data categorization")

# Convert gtsummary object to a flextable object and format character columns
flex_table_s2 <- table_s2 |>
  gtsummary::as_flex_table() |>
  my_flextable_theme() |>
  flextable::fontsize(part = "all", size = 10) |>
  ftExtra::colformat_md()

# View
flex_table_s2
Table S2. Sensitivity to continuous data categorization

Patients with T2DM (N = 168)

Patients without T2DM (N = 446)

Characteristic

Univariate OR1

95% CI2

p value

Multivariable OR1

95% CI2

p value

Univariate OR1

95% CI2

p value

Multivariable OR1

95% CI2

p value

Age (years)

1.04

1.01, 1.07

0.005

1.03

1.00, 1.07

0.046

1.06

1.04, 1.08

<0.001

1.06

1.04, 1.08

<0.001

Hypertension

No

Yes

1.95

1.04, 3.68

0.038

1.72

0.80, 3.78

0.168

1.40

0.88, 2.21

0.153

0.92

0.54, 1.58

0.767

Dyspnea

No

Yes

6.09

1.99, 26.60

0.005

4.28

1.20, 21.01

0.041

1.44

0.77, 2.77

0.264

0.82

0.39, 1.77

0.614

Heart rate (BPM)

1.03

1.01, 1.05

0.004

1.02

1.00, 1.04

0.121

1.03

1.02, 1.04

<0.001

1.03

1.01, 1.04

<0.001

Respiratory rate (BPM)

1.03

0.98, 1.08

0.247

1.08

1.04, 1.11

<0.001

SBP (mmHg)

1.01

1.00, 1.03

0.142

1.00

0.99, 1.01

0.786

DBP (mmHg)

1.02

0.99, 1.05

0.129

0.99

0.97, 1.01

0.176

White-cells (×10−9/L)

1.10

1.04, 1.17

0.002

1.07

1.03, 1.11

<0.001

Neutrophils (×10−9/L)

1.12

1.06, 1.19

<0.001

1.10

1.03, 1.19

0.007

1.09

1.05, 1.13

<0.001

1.06

1.01, 1.11

0.014

Lymphocytes (×10−9/L)

0.48

0.25, 0.87

0.020

0.67

0.50, 0.86

0.003

NLR

1.05

1.02, 1.08

<0.001

1.04

1.02, 1.06

<0.001

Platelets (×10−9/L)

1.00

1.00, 1.01

0.040

1.00

1.00, 1.01

<0.001

SaO2

0.91

0.86, 0.95

<0.001

0.94

0.90, 0.98

0.013

0.93

0.91, 0.95

<0.001

0.96

0.94, 0.98

0.001

FiO2 (%)

1.01

0.99, 1.03

0.260

1.02

1.01, 1.03

<0.001

PaO2:FiO2 ratio

0.99

0.99, 1.00

<0.001

1.00

0.99, 1.00

0.227

0.99

0.99, 1.00

<0.001

1.00

0.99, 1.00

<0.001

PaO2

1.00

0.99, 1.01

0.711

0.99

0.98, 1.00

<0.001

1OR = Odds Ratio

295% CI = 95% Confidence Interval

Sensitivity to outliers and influential observations

This is an analysis of sensitivity to outliers and/or influential observations, which may involve removing those observations, evaluating the effects in the model, and re-evaluating the remaining observations.

Patients with T2DM

Outlier identification

final_model_dm <-
  glm(
    outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + neutrofilos.c +
      saturacion_de_oxigeno.c + pafi.c,
    family = binomial(link = "logit"),
    data = data_uv_dm
  )

# Outlier test
car::outlierTest(final_model_dm, n.max = Inf)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
##     rstudent unadjusted p-value Bonferroni p
## 144 2.095873           0.036093           NA
# Extract Studentized residuals
rstudent.dm <- rstudent(final_model_dm)

# Use numeric cutoff from outlier test to identify outliers
outliers.dm <- rstudent.dm > 2
rstudent.dm[outliers.dm]
##      144 
## 2.095873
# Identify outlier observations
data_uv_nondm[c("144"),
              c("edad.c", "hta", "disnea", "frecuencia_cardiaca.c", 
                "neutrofilos.c", "saturacion_de_oxigeno.c", "pafi.c")]
##     edad.c hta disnea frecuencia_cardiaca.c neutrofilos.c
## 144  >= 61  No    Yes                 < 100         > 6.3
##     saturacion_de_oxigeno.c pafi.c
## 144                   >= 90  > 200
# Plot Studentized residuals vs. fitted values
car::residualPlots(
  final_model_dm,
  pch = 20,
  smooth = list(col = "tomato"),
  col = "gray55",
  fitted = TRUE,
  terms = ~ 1,
  tests = FALSE,
  quadratic = FALSE,
  type = "rstudent"
)

# Highlight outliers
points(fitted(final_model_dm)[outliers.dm],
       rstudent.dm[outliers.dm],
       pch = 20,
       cex = 2)

Influential observations identification

# Studentized residuals and hat values
car::influenceIndexPlot(
  final_model_dm,
  vars = c("Studentized", "hat"),
  id = TRUE,
  main = "Residuals and leverage"
)

# Cook’s distance
car::influenceIndexPlot(final_model_dm,
                        vars = "Cook",
                        id = TRUE,
                        main = "Cook's distance")

# Extract Cook’s distance
cook.dm <- cooks.distance(final_model_dm)

# Subjective cutoff from figure
sub.cook.dm <- cook.dm  > 0.03

# View the extreme Cook's distance values
cook.dm[sub.cook.dm]
##         55        130        144        175 
## 0.03871005 0.03399731 0.03513536 0.04671050
# Extract DFBetas
dfbetas.dm <- dfbetas(final_model_dm)

# Standard cutoff for DFBeta
sub.dfbetas.dm <- apply(abs(dfbetas.dm) > 0.2, 1, any)

# View the extreme DFBetas
dfbetas.dm[sub.dfbetas.dm, ]
##      (Intercept) edad.c>= 61      htaYes    disneaYes
## 8   -0.130008969 -0.09279003  0.05002304  0.208642194
## 19   0.047849740 -0.14080886  0.10369708 -0.041209696
## 30   0.189461311  0.04125722  0.14512184 -0.307719583
## 34  -0.006940373 -0.16133180  0.16832707  0.042690085
## 41  -0.213426706 -0.13255231  0.06279178  0.367124249
## 43   0.057075664  0.13401314 -0.11596697  0.025841915
## 45   0.125220698 -0.13393548  0.13906262  0.034623250
## 55   0.297608545  0.11749403 -0.11084920 -0.335358840
## 57   0.134986489 -0.09275170 -0.09872224  0.027805535
## 59   0.071901713  0.06698219  0.08388994  0.021990603
## 67   0.189461311  0.04125722  0.14512184 -0.307719583
## 91   0.031769758 -0.05669880 -0.13972302 -0.035937424
## 116 -0.014260150  0.13477451 -0.13577581  0.069877931
## 120  0.066071618 -0.11709781  0.14279326  0.002198781
## 129  0.225978483 -0.10013977 -0.01189591 -0.365742955
## 130  0.180328681  0.17911699 -0.07760427 -0.373707081
## 144  0.179883758  0.12046877 -0.18479759  0.078495104
## 160  0.084222363  0.14378608 -0.21503492 -0.001521023
## 171 -0.022166784 -0.07543358 -0.06974149  0.046411032
## 175  0.256839466 -0.15179389 -0.13467646  0.059019627
## 182  0.052141315 -0.11929261 -0.09826788  0.068799379
##     frecuencia_cardiaca.c>= 100 neutrofilos.c> 6.3 saturacion_de_oxigeno.c< 90
## 8                   -0.09586931        -0.03624550                  0.04468346
## 19                  -0.12012249        -0.02216533                 -0.18113843
## 30                  -0.06942338         0.04545261                  0.19490735
## 34                  -0.16718123         0.10885870                 -0.18504636
## 41                  -0.14915548        -0.02307813                 -0.20792355
## 43                   0.07281406        -0.23502076                  0.07722299
## 45                   0.10794495        -0.21906737                 -0.02523951
## 55                  -0.10217997         0.05452558                 -0.06686461
## 57                   0.04903130        -0.27927467                  0.11280709
## 59                  -0.12361615        -0.21348456                  0.10280691
## 67                  -0.06942338         0.04545261                  0.19490735
## 91                   0.11790123        -0.05484881                 -0.21013120
## 116                 -0.14993685         0.06284712                 -0.21834274
## 120                  0.06414324        -0.20680163                  0.11349058
## 129                  0.08529858         0.03868793                  0.09689404
## 130                  0.14429957         0.07569299                 -0.21007695
## 144                 -0.13302250        -0.25299085                 -0.05121188
## 160                 -0.08400667        -0.07909338                 -0.09079689
## 171                  0.06573779         0.07413775                 -0.20620594
## 175                 -0.19196585        -0.30296227                  0.25248953
## 182                 -0.19486393         0.07029045                 -0.20634362
##     pafi.c<= 200
## 8     0.04286161
## 19    0.21768302
## 30   -0.14066688
## 34    0.20817704
## 41    0.19844725
## 43    0.04360648
## 45   -0.10928865
## 55   -0.05275721
## 57    0.03735786
## 59    0.06863060
## 67   -0.14066688
## 91    0.18936265
## 116   0.21019303
## 120   0.03948229
## 129   0.06328536
## 130   0.21865446
## 144  -0.07636639
## 160  -0.06015870
## 171   0.17379872
## 175  -0.22710604
## 182   0.20189483
# Put it all together
influential_obs.dm <- outliers.dm | sub.cook.dm | sub.dfbetas.dm
sum(influential_obs.dm)
## [1] 21

# Identify influential observations
influential_obs.dm.true <- influential_obs.dm[influential_obs.dm]
influential_obs.dm.true
##    8   19   30   34   41   43   45   55   57   59   67   91  116  120  129  130 
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE 
##  144  160  171  175  182 
## TRUE TRUE TRUE TRUE TRUE

Patients without T2DM

Outlier identification

final_model_non <-
  glm(
    outcome ~ edad.c + hta + disnea + frecuencia_cardiaca.c + neutrofilos.c +
      saturacion_de_oxigeno.c + pafi.c,
    family = binomial(link = "logit"),
    data = data_uv_nondm
  )

# Outlier test
car::outlierTest(final_model_non, n.max = Inf)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
##     rstudent unadjusted p-value Bonferroni p
## 560 2.066517            0.03878           NA
# Extract Studentized residuals
rstudent.non <- rstudent(final_model_non)

# Extract Studentized residuals
rstudent.non <- rstudent(final_model_non)

# Use numeric cutoff from outlier test to identify outliers
outliers.non <- rstudent.non > 2
rstudent.non[outliers.non]
##      560      608 
## 2.066517 2.066517
# Identify outlier observations
data_uv_nondm[c("560", "608"),
              c("edad.c", "hta", "disnea", "frecuencia_cardiaca.c", 
                "neutrofilos.c", "saturacion_de_oxigeno.c", "pafi.c")]
##     edad.c hta disnea frecuencia_cardiaca.c neutrofilos.c
## 560   < 61  No     No                >= 100        <= 6.3
## 608   < 61  No     No                >= 100        <= 6.3
##     saturacion_de_oxigeno.c pafi.c
## 560                   >= 90  > 200
## 608                   >= 90  > 200
# Plot Studentized residuals vs. fitted values
car::residualPlots(
  final_model_non,
  pch = 20,
  smooth = list(col = "tomato"),
  col = "gray55",
  fitted = TRUE,
  terms = ~ 1,
  tests = FALSE,
  quadratic = FALSE,
  type = "rstudent"
)

# Highlight outliers
points(fitted(final_model_non)[outliers.non],
       rstudent.non[outliers.non],
       pch = 20,
       cex = 2)

Influential observations identification

# Studentized residuals and hat values
car::influenceIndexPlot(
  final_model_non,
  vars = c("Studentized", "hat"),
  id = TRUE,
  main = "Residuals and leverage"
)

# Cook’s distance
car::influenceIndexPlot(final_model_non,
                        vars = "Cook",
                        id = TRUE,
                        main = "Cook's distance")

# Extract Cook’s distance
cook.non <- cooks.distance(final_model_non)

# Subjective cutoff from figure
sub.cook.non <- cook.non > 0.015

# View the extreme Cook's distance values
cook.non[sub.cook.non]
##        531        560        608 
## 0.01540577 0.01529497 0.01529497
# Extract DFBetas
dfbetas.non <- dfbetas(final_model_non)

# Standard cutoff for DFBeta
sub.dfbetas.non <- apply(abs(dfbetas.non) > 0.2, 1, any)

# View the extreme DFBetas
dfbetas.non[sub.dfbetas.non, ]
##     (Intercept) edad.c>= 61       htaYes  disneaYes frecuencia_cardiaca.c>= 100
## 101  0.17354126 -0.06797639 -0.005221852 -0.2242761                  0.05043184
## 215  0.17287667 -0.08088673  0.128648522 -0.2101026                  0.04298172
## 323 -0.09993117 -0.08862408  0.037752828  0.2267705                 -0.07583293
## 421 -0.15175024 -0.08619842  0.063139853  0.2110900                 -0.08614098
## 427  0.21867513  0.05602203 -0.040821212 -0.1818798                 -0.04506834
## 531  0.19516203 -0.08397449  0.139217328 -0.2030830                  0.06486495
## 560  0.21453037 -0.06220901 -0.029694768 -0.1684767                  0.07235161
## 608  0.21453037 -0.06220901 -0.029694768 -0.1684767                  0.07235161
##     neutrofilos.c> 6.3 saturacion_de_oxigeno.c< 90 pafi.c<= 200
## 101         0.06354191                  0.11722216  -0.10089394
## 215        -0.08260648                  0.08226328   0.05249700
## 323        -0.04888896                 -0.07444893  -0.06085177
## 421         0.11466521                 -0.08296182  -0.08066006
## 427        -0.08019116                 -0.01418384  -0.03450160
## 531        -0.08315222                 -0.06414787   0.09729552
## 560        -0.08521001                 -0.02461495  -0.04010798
## 608        -0.08521001                 -0.02461495  -0.04010798
# Put it all together
influential_obs.non <- outliers.non| sub.cook.non | sub.dfbetas.non
sum(influential_obs.non)
## [1] 8
# Identify influential observations
influential_obs.non.true <- influential_obs.non[influential_obs.non]
influential_obs.non.true
##  101  215  323  421  427  531  560  608 
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

We performed a sensitivity analysis and our results did not change significantly except for dyspnea which became an independent predictor in T2DM patients, and the neutrophil count did so for non-T2DM patients. In addition, we identified 21 outliers and influential observations in T2DM patients and eight in non-T2DM patients. Although the number of the latter decreased (N=446).

Dimensional reduction

Principal Component Analysis (PCA) is an excellent technique for reducing data complexity before applying other analytical methods, such as linear regression, k-means clustering, and random forest classification. Integrating PCA with other methods enhances data analysis, model building, and interpretation, resulting in more accurate and efficient results. The choice between PCA and factor analysis depends on the objectives of the analysis.

Principal Component Analysis (PCA)

The PCA is a statistical technique that reduces the dimensionality of a dataset. This process transforms high-dimensional (complex or large) data sets into simpler (smaller) data sets while preserving important information. It helps to remove noise and redundancy from the data set, making the true patterns more pronounced and easier to analyze. With fewer dimensions, it’s possible to visually explore and understand complex data sets, focusing on the most relevant features. By identifying the principal components, it’s possible to focus on the most influential variables or individuals driving the trends and patterns in the data. This significantly reduces the complexity of interpreting data and makes it easier to explore and visualize. The output of PCA includes the variance explained by each principal component, which helps to understand the importance of each component in the analysis. This is crucial for making informed decisions based on the data. Furthermore, this process is crucial for more effectively analyzing data. The application of PCA can facilitate the acceleration of learning algorithms employed in data analysis, thereby enabling a more efficient and faster process. A reduction in data volume will result in a decrease in the storage and computational resources required. PCA is not merely a mathematical technique; it’s a strategic approach to dealing with data in the most efficient way possible.

📝NOTE: PCA only works with numerical values.

# Patients with T2DM
old_dim.dm <- PCA_data_select(data_match, t2dm, "yes")
# Patients without T2DM
old_dim.non <- PCA_data_select(data_match, t2dm, "no")

The first step is to utilize the prcomp() function from the stats package to perform PCA, with the parameters centered and scaled. This function is robust and offers a standardized method to perform PCA with ease, providing the user with the principal components, variance captured, and more. The next step is to examine the summary with the summary() or get_eigenvalue() functions. This will allow you to understand the proportion of variance represented in each principal component and determine which variables contribute the most to them. This information allows us to evaluate the importance and contribution of each principal component.

📝NOTE: The eigenvalues represent the variance explained by each principal component.

Explore the mathematical concepts of eigenvalues and eigenvectors. Eigenvalues measure the variance captured by each principal component. A higher eigenvalue indicates that the component accounts for a greater proportion of the variance in the data set. This allows for the determination of which principal components are worth keeping for analysis. Eigenvectors, on the other hand, define the direction of the principal components, which are formed by the combination of the original variables. In addition, eigenvectors are perpendicular (orthogonal) to each other, representing a new axis in data space that is aligned with the maximum variance. This orthogonal property ensures that the principal components are independent of each other. The eigenvalues and eigenvectors operate in tandem within PCA to transform and simplify the data set. By identifying new axes (eigenvectors) that maximize variance (eigenvalues), PCA allows the data to be viewed from a different perspective, focusing on the most informative aspects.

Perform PCA and analyze the eigenvalues/variances

# Scaled (standardization) for patients with T2DM
new_dim.dm <- stats::prcomp(old_dim.dm[1:14], scale. = TRUE, center = TRUE)
# Scaled (standardization) for patients without T2DM
new_dim.non <- stats::prcomp(old_dim.non[1:14], scale. = TRUE, center = TRUE)
# Extract the eigenvalues of the new dimensions of patients with T2DM
factoextra::get_eigenvalue(new_dim.dm) |>
  mutate(across(where(is.numeric), round, 2))
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1        3.20            22.85                       22.85
## Dim.2        1.71            12.24                       35.09
## Dim.3        1.61            11.51                       46.60
## Dim.4        1.39             9.92                       56.52
## Dim.5        1.19             8.47                       64.99
## Dim.6        1.02             7.29                       72.28
## Dim.7        0.81             5.79                       78.06
## Dim.8        0.77             5.47                       83.54
## Dim.9        0.71             5.04                       88.58
## Dim.10       0.64             4.59                       93.17
## Dim.11       0.35             2.51                       95.68
## Dim.12       0.31             2.25                       97.92
## Dim.13       0.25             1.81                       99.73
## Dim.14       0.04             0.27                      100.00
# Extract the eigenvalues of the new dimensions of patients without T2DM
factoextra::get_eigenvalue(new_dim.non) |>
  mutate(across(where(is.numeric), round, 2))
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1        3.09            22.04                       22.04
## Dim.2        1.72            12.26                       34.30
## Dim.3        1.51            10.78                       45.08
## Dim.4        1.41            10.05                       55.13
## Dim.5        1.24             8.82                       63.95
## Dim.6        1.03             7.38                       71.34
## Dim.7        0.89             6.36                       77.70
## Dim.8        0.78             5.58                       83.27
## Dim.9        0.71             5.05                       88.32
## Dim.10       0.54             3.85                       92.17
## Dim.11       0.44             3.16                       95.33
## Dim.12       0.37             2.66                       97.99
## Dim.13       0.27             1.94                       99.92
## Dim.14       0.01             0.08                      100.00

To visualize the PCA, the ggplot2 and factoextra packages can be utilized. Plots such as scree plots can be used to understand the variance explained (eigenvalues) by each component or biplots to understand the relationship between variables and components and how the data is distributed across these new dimensions (principal components). This helps to reveal patterns and clusters. A scree plot assesses the contribution of each component to the total variance. It determines the optimal number of principal components to keep, streamlines the analysis by focusing on the most important components.

# Plot variances of new dimensions of patients with T2DM
fviz_eig(
  new_dim.dm,
  addlabels = TRUE,
  choice = "variance",
  barfill = "#99CC00FF",
  barcolor = "#99CC00FF",
  xlab = "New dimensions of patients with T2DM"
)

# Plot variances of new dimensions of patients without T2DM
fviz_eig(
  new_dim.non,
  addlabels = TRUE,
  choice = "variance",
  barfill = "#99CC00FF",
  barcolor = "#99CC00FF",
  xlab = "New dimensions of patients without T2DM"
)  

Variables for patients with T2DM

# Extract the results for the variables
var.dm <- get_pca_var(new_dim.dm)

# Coordinates for the variables
head(var.dm$coord, 5)
##                      Dim.1       Dim.2      Dim.3      Dim.4       Dim.5
## Age              0.1310413 -0.01538093 0.00304282 -0.7415010 -0.14427367
## Respiratory rate 0.2954787 -0.34578847 0.50770851 -0.0948328  0.25374829
## Heart rate       0.3849381 -0.26256312 0.23942025  0.3361526  0.07269603
## SBP              0.3303417  0.77340955 0.22768075 -0.1919476  0.18845534
## DBP              0.2888826  0.77660119 0.16813823 -0.1544252  0.23536017
##                       Dim.6       Dim.7       Dim.8       Dim.9       Dim.10
## Age              -0.1274996 -0.24030905 -0.38728046 -0.16105519  0.396914774
## Respiratory rate  0.1140370 -0.18586860 -0.39933734 -0.02944328 -0.492540565
## Heart rate        0.4238345  0.08871048  0.05635499 -0.60335699  0.223770634
## SBP               0.0879648  0.02246661 -0.08489628 -0.09473227 -0.073900616
## DBP               0.1720099  0.19300210  0.10807207  0.03617212 -0.002223504
##                        Dim.11      Dim.12      Dim.13       Dim.14
## Age              -0.038605580 -0.02024160 -0.05066146  0.002485894
## Respiratory rate -0.006165313 -0.08637889 -0.06311392  0.002371376
## Heart rate       -0.008953320  0.04499895 -0.01902602 -0.004096643
## SBP               0.205997305  0.12559816  0.28354627 -0.004531681
## DBP              -0.200873126 -0.08628742 -0.27978990  0.004162160
# Correlations between variables and dimensions
head(var.dm$cor, 5)
##                      Dim.1       Dim.2      Dim.3      Dim.4       Dim.5
## Age              0.1310413 -0.01538093 0.00304282 -0.7415010 -0.14427367
## Respiratory rate 0.2954787 -0.34578847 0.50770851 -0.0948328  0.25374829
## Heart rate       0.3849381 -0.26256312 0.23942025  0.3361526  0.07269603
## SBP              0.3303417  0.77340955 0.22768075 -0.1919476  0.18845534
## DBP              0.2888826  0.77660119 0.16813823 -0.1544252  0.23536017
##                       Dim.6       Dim.7       Dim.8       Dim.9       Dim.10
## Age              -0.1274996 -0.24030905 -0.38728046 -0.16105519  0.396914774
## Respiratory rate  0.1140370 -0.18586860 -0.39933734 -0.02944328 -0.492540565
## Heart rate        0.4238345  0.08871048  0.05635499 -0.60335699  0.223770634
## SBP               0.0879648  0.02246661 -0.08489628 -0.09473227 -0.073900616
## DBP               0.1720099  0.19300210  0.10807207  0.03617212 -0.002223504
##                        Dim.11      Dim.12      Dim.13       Dim.14
## Age              -0.038605580 -0.02024160 -0.05066146  0.002485894
## Respiratory rate -0.006165313 -0.08637889 -0.06311392  0.002371376
## Heart rate       -0.008953320  0.04499895 -0.01902602 -0.004096643
## SBP               0.205997305  0.12559816  0.28354627 -0.004531681
## DBP              -0.200873126 -0.08628742 -0.27978990  0.004162160
# Quality of representation (Cos2) for the  variables on the dimensions
head(var.dm$cos2, 5)
##                       Dim.1       Dim.2        Dim.3      Dim.4       Dim.5
## Age              0.01717182 0.000236573 9.258754e-06 0.54982373 0.020814891
## Respiratory rate 0.08730765 0.119569665 2.577679e-01 0.00899326 0.064388192
## Heart rate       0.14817737 0.068939392 5.732205e-02 0.11299860 0.005284713
## SBP              0.10912563 0.598162327 5.183852e-02 0.03684386 0.035515416
## DBP              0.08345316 0.603109414 2.827047e-02 0.02384714 0.055394407
##                        Dim.6        Dim.7       Dim.8       Dim.9       Dim.10
## Age              0.016256136 0.0577484372 0.149986158 0.025938774 1.575413e-01
## Respiratory rate 0.013004429 0.0345471363 0.159470313 0.000866907 2.425962e-01
## Heart rate       0.179635649 0.0078695489 0.003175885 0.364039661 5.007330e-02
## SBP              0.007737806 0.0005047487 0.007207378 0.008974202 5.461301e-03
## DBP              0.029587420 0.0372498109 0.011679573 0.001308422 4.943969e-06
##                        Dim.11       Dim.12       Dim.13       Dim.14
## Age              1.490391e-03 0.0004097223 0.0025665834 6.179669e-06
## Respiratory rate 3.801109e-05 0.0074613121 0.0039833672 5.623423e-06
## Heart rate       8.016193e-05 0.0020249051 0.0003619894 1.678249e-05
## SBP              4.243489e-02 0.0157748981 0.0803984862 2.053613e-05
## DBP              4.035001e-02 0.0074455184 0.0782823867 1.732358e-05
# Correlation matrix - Cos2 for the variables
my_ggcorrplor(var.dm$cos2)

# Cos2 plot for the variables on the dimensions 1 and 2
fviz_cos2(new_dim.dm, choice = "var", axes = 1:2)

# Plot of 10 variables with highest cos2 on PC1 and PC2
a <- fviz_cos2(new_dim.dm,
               choice = "var",
               axes = 1:2,
               top = 10)

# Plot of 10 variables with highest cos2 on PC3 and PC4
b <- fviz_cos2(new_dim.dm,
               choice = "var",
               axes = 3:4,
               top = 10)

# Correlations circle and color by cos2 values
c <- fviz_pca_var(
  new_dim.dm,
  col.var.dm = "cos2",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26"),
  repel = TRUE
) # repel = TRUE avoid text overlapping
# Contributions of the variables on the dimensions
head(var.dm$contrib, 5)
##                      Dim.1       Dim.2        Dim.3      Dim.4     Dim.5
## Age              0.5368976  0.01380412 5.743529e-04 39.6061002 1.7550924
## Respiratory rate 2.7297779  6.97693390 1.599025e+01  0.6478221 5.4291528
## Heart rate       4.6329425  4.02263885 3.555887e+00  8.1397609 0.4456021
## SBP              3.4119431 34.90299165 3.215725e+00  2.6540173 2.9946271
## DBP              2.6092628 35.19165600 1.753716e+00  1.7178093 4.6708052
##                       Dim.6      Dim.7      Dim.8      Dim.9       Dim.10
## Age               1.5932046 7.12606349 19.5762134  3.6737993 2.453262e+01
## Respiratory rate  1.2745166 4.26306059 20.8140865  0.1227831 3.777752e+01
## Heart rate       17.6054342 0.97108958  0.4145169 51.5602104 7.797504e+00
## SBP               0.7583541 0.06228517  0.9407080  1.2710476 8.504437e-01
## DBP               2.8997550 4.59656625  1.5244194  0.1853165 7.698838e-04
##                       Dim.11    Dim.12     Dim.13     Dim.14
## Age               0.42409231 0.1303262  1.0148704 0.01634018
## Respiratory rate  0.01081610 2.3733251  1.5750906 0.01486936
## Heart rate        0.02281017 0.6440902  0.1431367 0.04437597
## SBP              12.07489365 5.0177450 31.7909176 0.05430131
## DBP              11.48163972 2.3683014 30.9541762 0.04580672
# Correlation matrix - contributions of the variables
corrplot(var.dm$contrib, is.corr = FALSE)

# Contribution plot of the top 10 variables on PC1
fviz_contrib(new_dim.dm,
             choice = "var",
             axes = 1,
             top = 10)

# Contribution plot of the top 10 variables on PC2
fviz_contrib(new_dim.dm,
             choice = "var",
             axes = 2,
             top = 10)

# Plot of 10 variables with the highest contribution on PC1 and PC2
d <- fviz_contrib(new_dim.dm,
                  choice = "var",
                  axes = 1:2,
                  top = 10)

# Plot of 10 variables with the highest contribution on PC3 and PC4
e <- fviz_contrib(new_dim.dm,
                  choice = "var",
                  axes = 3:4,
                  top = 10)

# Correlations circle and color by contributions values
f <- fviz_pca_var(
  new_dim.dm,
  col.var.dm = "contrib",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26"),
  repel = TRUE
)
# Arrange multiple plots
pca_dm <- ggpubr::ggarrange(a, d, b, e, c, f,
                             ncol = 2, nrow = 3,
                             labels = c("A)", "B)", "C)", "D)", "E)", "F)"),
                             legend = "right")

annotate_figure(pca_dm,
                top = text_grob("PCA of patients with T2DM",
                                face = "bold",
                                size = 16))

Variables for patients without T2DM

# Extract the results for the variables
var.non <- get_pca_var(new_dim.non)

# Coordinates for the variables
head(var.non$coord, 5)
##                       Dim.1       Dim.2      Dim.3        Dim.4       Dim.5
## Age              0.21518444 -0.05215009 -0.3965975 -0.423221400  0.17663250
## Respiratory rate 0.37273416 -0.22526402  0.1897202 -0.550278994 -0.04219734
## Heart rate       0.23062262 -0.08539973  0.2205749 -0.289395089 -0.45069372
## SBP              0.04075027 -0.70432079  0.3827636  0.008191438  0.33822831
## DBP              0.05408796 -0.69680536  0.3659407 -0.003278418  0.34429609
##                        Dim.6        Dim.7       Dim.8       Dim.9        Dim.10
## Age               0.35972439 -0.239147620 -0.58464744  0.12237971 -0.1785602443
## Respiratory rate  0.05555191 -0.227518906  0.37215177 -0.39393213 -0.3427562249
## Heart rate       -0.20617946  0.665947029 -0.24812384  0.06760363 -0.2247425889
## SBP               0.10198745 -0.008355876 -0.10135205  0.05126318  0.0003236049
## DBP              -0.15117820  0.070983699 -0.03092006  0.12654648  0.1251027425
##                       Dim.11      Dim.12       Dim.13        Dim.14
## Age              -0.08450294  0.02872303 -0.011722024 -0.0012128370
## Respiratory rate -0.05873013 -0.01564605 -0.006149518  0.0001438574
## Heart rate        0.02018809 -0.02288268 -0.020515933  0.0002057273
## SBP               0.46516400 -0.02718829  0.028438278 -0.0010651979
## DBP              -0.44237664  0.04154781 -0.033704799  0.0001729203
# Correlations between variables and dimensions
head(var.non$cor, 5)
##                       Dim.1       Dim.2      Dim.3        Dim.4       Dim.5
## Age              0.21518444 -0.05215009 -0.3965975 -0.423221400  0.17663250
## Respiratory rate 0.37273416 -0.22526402  0.1897202 -0.550278994 -0.04219734
## Heart rate       0.23062262 -0.08539973  0.2205749 -0.289395089 -0.45069372
## SBP              0.04075027 -0.70432079  0.3827636  0.008191438  0.33822831
## DBP              0.05408796 -0.69680536  0.3659407 -0.003278418  0.34429609
##                        Dim.6        Dim.7       Dim.8       Dim.9        Dim.10
## Age               0.35972439 -0.239147620 -0.58464744  0.12237971 -0.1785602443
## Respiratory rate  0.05555191 -0.227518906  0.37215177 -0.39393213 -0.3427562249
## Heart rate       -0.20617946  0.665947029 -0.24812384  0.06760363 -0.2247425889
## SBP               0.10198745 -0.008355876 -0.10135205  0.05126318  0.0003236049
## DBP              -0.15117820  0.070983699 -0.03092006  0.12654648  0.1251027425
##                       Dim.11      Dim.12       Dim.13        Dim.14
## Age              -0.08450294  0.02872303 -0.011722024 -0.0012128370
## Respiratory rate -0.05873013 -0.01564605 -0.006149518  0.0001438574
## Heart rate        0.02018809 -0.02288268 -0.020515933  0.0002057273
## SBP               0.46516400 -0.02718829  0.028438278 -0.0010651979
## DBP              -0.44237664  0.04154781 -0.033704799  0.0001729203
# Quality of representation (Cos2) for the  variables on the dimensions
head(var.non$cos2, 5)
##                        Dim.1       Dim.2      Dim.3        Dim.4       Dim.5
## Age              0.046304345 0.002719632 0.15728958 1.791164e-01 0.031199042
## Respiratory rate 0.138930757 0.050743880 0.03599375 3.028070e-01 0.001780616
## Heart rate       0.053186792 0.007293113 0.04865329 8.374952e-02 0.203124826
## SBP              0.001660585 0.496067778 0.14650801 6.709966e-05 0.114398391
## DBP              0.002925508 0.485537703 0.13391259 1.074803e-05 0.118539801
##                        Dim.6        Dim.7      Dim.8       Dim.9       Dim.10
## Age              0.129401636 5.719158e-02 0.34181263 0.014976793 3.188376e-02
## Respiratory rate 0.003086015 5.176485e-02 0.13849694 0.155182527 1.174818e-01
## Heart rate       0.042509972 4.434854e-01 0.06156544 0.004570251 5.050923e-02
## SBP              0.010401439 6.982067e-05 0.01027224 0.002627914 1.047201e-07
## DBP              0.022854847 5.038685e-03 0.00095605 0.016014012 1.565070e-02
##                        Dim.11       Dim.12       Dim.13       Dim.14
## Age              0.0071407465 0.0008250126 1.374059e-04 1.470974e-06
## Respiratory rate 0.0034492277 0.0002447989 3.781657e-05 2.069495e-08
## Heart rate       0.0004075591 0.0005236169 4.209035e-04 4.232373e-08
## SBP              0.2163775487 0.0007392029 8.087357e-04 1.134647e-06
## DBP              0.1956970933 0.0017262206 1.136013e-03 2.990143e-08
# Correlation matrix - Cos2 for the variables
my_ggcorrplor(var.non$cos2)

# Cos2 plot for the variables on dimensions 1 and 2
fviz_cos2(new_dim.non, choice = "var", axes = 1:2)

# Plot of 10 variables with highest cos2 on PC1 and PC2
a <- fviz_cos2(new_dim.non,
               choice = "var",
               axes = 1:2,
               top = 10)

# Plot of 10 variables with highest cos2 on PC3 and PC4
b <- fviz_cos2(new_dim.non,
               choice = "var",
               axes = 3:4,
               top = 10)

# Correlations circle and color by cos2 values
c <- fviz_pca_var(
  new_dim.non,
  col.var.non = "cos2",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26"),
  repel = TRUE
) # repel = TRUE avoid text overlapping
# Contributions of the variables on the dimensions
head(var.non$contrib, 5)
##                       Dim.1      Dim.2     Dim.3        Dim.4      Dim.5
## Age              1.50077200  0.1584216 10.419284 1.273310e+01  2.5260573
## Respiratory rate 4.50289040  2.9558867  2.384322 2.152607e+01  0.1441691
## Heart rate       1.72383928  0.4248318  3.222925 5.953621e+00 16.4461764
## SBP              0.05382127 28.8964926  9.705084 4.770009e-03  9.2623642
## DBP              0.09481875 28.2831041  8.870730 7.640603e-04  9.5976770
##                       Dim.6        Dim.7      Dim.8      Dim.9       Dim.10
## Age              12.5174970  6.424043143 43.7811287  2.1188892 5.921892e+00
## Respiratory rate  0.2985216  5.814485689 17.7394041 21.9549388 2.182035e+01
## Heart rate        4.1121462 49.814490779  7.8856198  0.6465907 9.381271e+00
## SBP               1.0061695  0.007842604  1.3157212  0.3717924 1.945006e-05
## DBP               2.2108335  0.565970214  0.1224558  2.2656330 2.906863e+00
##                       Dim.11     Dim.12     Dim.13       Dim.14
## Age               1.61271902 0.22184361 0.05068201 0.0136711691
## Respiratory rate  0.77899911 0.06582574 0.01394860 0.0001923381
## Heart rate        0.09204616 0.14079914 0.15524984 0.0003933550
## SBP              48.86830648 0.19876961 0.29830136 0.0105453593
## DBP              44.19767944 0.46417593 0.41901746 0.0002779027
# Correlation matrix - contributions of the variables
corrplot(var.non$contrib, is.corr = FALSE)

# Contribution plot of the top 10 variables on PC1
fviz_contrib(new_dim.non,
             choice = "var",
             axes = 1,
             top = 10)

# Contribution plot of the top 10 variables on PC2
fviz_contrib(new_dim.non,
             choice = "var",
             axes = 2,
             top = 10)

# Plot of 10 variables with the highest contribution on PC1 and PC2
d <- fviz_contrib(new_dim.non,
                  choice = "var",
                  axes = 1:2,
                  top = 10)

# Plot of 10 variables with the highest contribution on PC3 and PC4
e <- fviz_contrib(new_dim.non,
                  choice = "var",
                  axes = 3:4,
                  top = 10)

# Correlations circle and color by contributions values
f <- fviz_pca_var(
  new_dim.dm,
  col.var.dm = "contrib",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26"),
  repel = TRUE
)
# Arrange multiple plots
pca_non <- ggpubr::ggarrange(a, d, b, e, c, f,
                             ncol = 2, nrow = 3,
                             labels = c("A)", "B)", "C)", "D)", "E)", "F)"),
                             legend = "right")

annotate_figure(pca_non,
                top = text_grob("PCA of patients without T2DM",
                                face = "bold",
                                size = 16))

Individuals with T2M

# Extract results for the individuals
ind <- get_pca_ind(new_dim.dm)

# Coordinates for the individuals
head(ind$coord, 5)

# Cos2 of the individuals on dimensions
head(ind$cos2, 5)
# Plot of 30 individuals with the highest cos2 on PC1 and PC2
a <- fviz_cos2(new_dim.dm,
               choice = "ind",
               axes = 1:2,
               top = 30)

# Plot of 30 individuals with the highest cos2 on PC3 and PC4
b <- fviz_cos2(new_dim.dm,
               choice = "ind",
               axes = 3:4,
               top = 30)

# Correlations circle and color by cos2 values
c <- fviz_pca_ind(
  new_dim.dm,
  col.ind = "cos2",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26"),
  repel = TRUE
)
# Contributions of the individuals on dimensions
head(ind$contrib, 5)
##          Dim.1     Dim.2      Dim.3       Dim.4       Dim.5     Dim.6     Dim.7
## 2  0.008115689 0.9973682 0.20823312 0.015720403 0.035318499 0.3470700 0.6440874
## 4  5.102168432 2.1202962 0.28441438 0.446195218 1.083264012 0.1280857 0.0165201
## 8  0.160499125 0.1935381 0.72458754 0.003721498 0.080764308 0.7928768 0.1422958
## 9  0.011712313 0.3235136 0.13191277 0.065136614 0.711285583 0.0156119 0.0155242
## 10 0.009588282 1.7673649 0.01239926 1.672997334 0.009233754 0.5124959 0.1848037
##         Dim.8      Dim.9       Dim.10     Dim.11       Dim.12     Dim.13
## 2  1.00223043 0.56958998 2.124423e-01 0.01971417 0.0125629046 0.22318489
## 4  0.01516423 0.21271390 2.660413e-01 6.99100938 3.6475741886 0.65463610
## 8  0.18695931 1.32138253 4.074472e-01 0.08991412 0.0008041594 1.03485716
## 9  0.05792905 0.01473957 2.465633e-06 0.05111502 0.3735253994 0.02160703
## 10 2.33851280 0.82426165 1.121999e+00 0.66397414 0.2600294586 0.42662454
##        Dim.14
## 2  0.06909645
## 4  0.07629944
## 8  0.01118992
## 9  0.04103263
## 10 0.01461034
# Plot of 30 individuals with the highest contribution on PC1 and PC2
d <- fviz_contrib(new_dim.dm,
                  choice = "ind",
                  axes = 1:2,
                  top = 30)

# Plot of 30 individuals with the highest contribution on PC3 and PC4
e <- fviz_contrib(new_dim.dm,
                  choice = "ind",
                  axes = 3:4,
                  top = 30)

# Correlations circle and color by contributions values
f <- fviz_pca_ind(
  new_dim.dm,
  col.ind = "contrib",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26")
)
# Arrange multiple plots
pca_ind.dm <- ggpubr::ggarrange(a, d, b, e, c, f, 
                                ncol = 2, nrow = 3, 
                                labels = c("A)", "B)", "C)", "D)", "E)", "F)"), 
                                legend = "right")

annotate_figure(pca_ind.dm,
                top = text_grob("Patients with T2DM",
                                face = "bold",
                                size = 16))

# Plot of individuals and color by outcome on PC1 and PC2
ind.graph <- fviz_pca_ind(
  new_dim.dm,
  axes = 1:2,
  geom.ind = "point",
  col.ind = old_dim.dm$outcome,
  palette = "igv",
  addEllipses = TRUE,
  mean.point = FALSE,
  ggtheme = theme_pubr()
)

a <- ggpubr::ggpar(
  ind.graph,
  title = element_blank(),
  xlab = "PC1 (22.8%)",
  ylab = "PC2 (12.2%)",
  legend.title = "Group",
  legend = "right",
  font.legend = c(12, "black")
  )

# Plot of individuals and color by outcome on PC3 and PC4
ind.graph <- fviz_pca_ind(
  new_dim.dm,
  axes = 3:4,
  geom.ind = "point",
  col.ind = old_dim.dm$outcome,
  palette = "igv",
  addEllipses = TRUE,
  mean.point = FALSE,
  ggtheme = theme_pubr()
  )

b <- ggpubr::ggpar(
  ind.graph,
  title = element_blank(),
  xlab = "PC3 (11.5%)",
  ylab = "PC2 (9.9%)",
  legend.title = "Group",
  legend = "right",
  font.legend = c(12, "black")
  )
# Arrange multiple plots
ggpubr::ggarrange(
  a, b,
  ncol = 2, nrow = 1,
  labels = c("A)", "B)"),
  legend = "bottom",
  common.legend = TRUE
  )

Individuals without T2M

# Extract results for the individuals
ind <- get_pca_ind(new_dim.non)

# Coordinates for the individuals
head(ind$coord, 5)

# Cos2 of the individuals on dimensions
head(ind$cos2, 5)
# Plot of 30 individuals with the highest cos2 on PC1 and PC2
a <- fviz_cos2(new_dim.non,
               choice = "ind",
               axes = 1:2,
               top = 30)

# Plot of 30 individuals with the highest cos2 on PC3 and PC4
b <- fviz_cos2(new_dim.non,
               choice = "ind",
               axes = 3:4,
               top = 30)

# Correlations circle and color by cos2 values
c <- fviz_pca_ind(
  new_dim.non,
  col.ind = "cos2",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26"),
  repel = TRUE
)
# Contributions of the individuals on dimensions
head(ind$contrib, 5)
##        Dim.1      Dim.2        Dim.3       Dim.4      Dim.5        Dim.6
## 3 0.31820650 0.03360899 3.415502e-02 0.110144218 0.72321743 0.0009604812
## 4 0.03247121 0.14439008 8.899058e-02 0.008490317 0.03452047 1.0023889927
## 6 0.07299621 0.62331203 1.433914e+00 0.643800227 2.03238515 0.1096727429
## 7 0.20055316 0.14661546 6.676089e-06 0.222460643 0.05654110 0.3964986952
## 9 0.73515872 0.31358863 6.139603e-02 0.140682105 0.27036549 0.0014156844
##        Dim.7       Dim.8      Dim.9     Dim.10     Dim.11     Dim.12     Dim.13
## 3 0.03825240 0.524727655 0.09250350 0.20934246 0.16837328 0.28656841 0.00146326
## 4 0.02188522 0.021976428 0.13239347 0.05433064 0.16676559 0.06015494 0.01079682
## 6 5.00297922 7.244204346 3.45374066 3.12933724 0.14764994 0.12384846 0.05071263
## 7 0.25704706 0.010915657 0.01383967 0.07236198 0.28932074 0.01652950 0.01332955
## 9 0.18526553 0.008364293 0.02247804 0.00103952 0.05941301 0.29211612 0.05942710
##         Dim.14
## 3 3.541570e-04
## 4 7.625939e-02
## 6 3.571723e-02
## 7 1.076086e-01
## 9 3.720626e-06
# Plot of 30 individuals with the highest contribution on PC1 and PC2
d <- fviz_contrib(new_dim.non,
                  choice = "ind",
                  axes = 1:2,
                  top = 30)

# Plot of 30 individuals with the highest contribution on PC3 and PC4
e <- fviz_contrib(new_dim.non,
                  choice = "ind",
                  axes = 3:4,
                  top = 30)

# Correlations circle and color by contributions values
f <- fviz_pca_ind(
  new_dim.non,
  col.ind = "contrib",
  gradient.cols = c("#fee0d2", "#fc9272", "#de2d26")
)
# Arrange multiple plots
pca_ind.non <- ggpubr::ggarrange(a, d, b, e, c, f, 
                                ncol = 2, nrow = 3, 
                                labels = c("A)", "B)", "C)", "D)", "E)", "F)"), 
                                legend = "right")

annotate_figure(pca_ind.non,
                top = text_grob("Patients without T2DM",
                                face = "bold",
                                size = 16))

# Plot of individuals and color by outcome on PC1 and PC2
ind.graph <- fviz_pca_ind(
  new_dim.non,
  axes = 1:2,
  geom.ind = "point",
  col.ind = old_dim.non$outcome,
  palette = "igv",
  addEllipses = TRUE,
  mean.point = FALSE,
  ggtheme = theme_pubr()
)

a <- ggpubr::ggpar(
  ind.graph,
  title = element_blank(),
  xlab = "PC1 (22%)",
  ylab = "PC2 (12.3%)",
  legend.title = "Group",
  legend = "right",
  font.legend = c(12, "black")
  )

# Plot of individuals and color by outcome on PC3 and PC4
ind.graph <- fviz_pca_ind(
  new_dim.non,
  axes = 3:4,
  geom.ind = "point",
  col.ind = old_dim.non$outcome,
  palette = "igv",
  addEllipses = TRUE,
  mean.point = FALSE,
  ggtheme = theme_pubr()
  )

b <- ggpubr::ggpar(
  ind.graph,
  title = element_blank(),
  xlab = "PC3 (10.8%)",
  ylab = "PC2 (10%)",
  legend.title = "Group",
  legend = "right",
  font.legend = c(12, "black")
  )
# Arrange multiple plots
ggpubr::ggarrange(
  a, b,
  ncol = 2, nrow = 1,
  labels = c("A)", "B)"),
  legend = "bottom",
  common.legend = TRUE
)

Biplot of individuals and variables of patients with T2DM

# Contributions on PC1 and PC2
a.dm <- fviz_pca_biplot(
  new_dim.dm,
  # Dimensions 1 and 2
  axes = 1:2,
  # Individuals
  col.ind = old_dim.dm$outcome,
  geom.ind = "point",
  col.var = "black",
  # Top 10 contributing variables
  geom.var = "text",
  select.var = list(contrib = 10),
  # Theme
  palette = "Set1",
  addEllipses = TRUE,
  mean.point = FALSE,
  repel = TRUE,
  ggtheme = theme_pubr()
  )

# Plot parameters
a1.dm <- ggpubr::ggpar(
  a.dm,
  title = element_blank(),
  xlab = "PC1 (22.8%)",
  ylab = "PC2 (12.2%)",
  legend.title = "Group",
  font.legend = c(12, "black")
  )

# Contributions on PC3 and PC4
b.dm <- fviz_pca_biplot(
  new_dim.dm,
  # Dimensions 3 and 4
  axes = 3:4,
  # Individuals
  col.ind = old_dim.dm$outcome,
  geom.ind = "point",
  col.var = "black",
  # Top 10 contributing variables
  geom.var = "text",
  select.var = list(contrib = 10),
  # Theme
  palette = "Set1",
  addEllipses = TRUE,
  mean.point = FALSE,
  repel = TRUE,
  ggtheme = theme_pubr()
  )

# Plot parameters
b1.dm <- ggpubr::ggpar(
  b.dm,
  title = element_blank(),
  xlab = "PC3 (11.5%)",
  ylab = "PC4 (9.9%)",
  legend.title = "Group",
  font.legend = c(12, "black")
  )
# Arrange multiple plots
F1_dm <- ggpubr::ggarrange(
  a1.dm, b1.dm,
  ncol = 2, nrow = 1,
  labels = c("A)", "B)"),
  legend = "bottom",
  common.legend = TRUE
)

# View
F1_dm

Biplot of individuals and variables of patients without T2DM

# Contributions on PC1 and PC2
a.non <- fviz_pca_biplot(
  new_dim.non,
  # Dimensions 1 and 2
  axes = 1:2,
  # Individuals
  col.ind = old_dim.non$outcome,
  geom.ind = "point",
  col.var = "black",
  # Top 10 contributing variables
  geom.var = "text",
  select.var = list(contrib = 10),
  # Theme
  palette = "Set1",
  addEllipses = TRUE,
  mean.point = FALSE,
  repel = TRUE,
  ggtheme = theme_pubr()
)

# Plot parameters
a1.non <- ggpubr::ggpar(
  a.non,
  title = element_blank(),
  xlab = "PC1 (22%)",
  ylab = "PC2 (12.3%)",
  legend.title = "Group",
  font.legend = c(12, "black")
)

# Contributions on PC3 and PC4
b.non <- fviz_pca_biplot(
  new_dim.non,
  # Dimensions 3 and 4
  axes = 3:4,
  # Individuals
  col.ind = old_dim.non$outcome,
  geom.ind = "point",
  col.var = "black",
  # Top 10 contributing variables
  geom.var = "text",
  select.var = list(contrib = 10),
  # Theme
  palette = "Set1",
  addEllipses = TRUE,
  mean.point = FALSE,
  repel = TRUE,
  ggtheme = theme_pubr()
)

# Plot parameters
b1.non <- ggpubr::ggpar(
  b.non,
  title = element_blank(),
  xlab = "PC3 (10.8%)",
  ylab = "PC4 (10%)",
  legend.title = "Group",
  font.legend = c(12, "black")
)
# Arrange multiple plots
F1_non <- ggpubr::ggarrange(
  a1.non, b1.non,
  ncol = 2, nrow = 1,
  labels = c("A)", "B)"),
  legend = "bottom",
  common.legend = TRUE
)

# View
F1_non

# Arrange multiple plots
FS2 <- ggpubr::ggarrange(
  a1.dm, b1.dm, a1.non, b1.non,
  ncol = 2, nrow = 2,
  labels = c("A)", "B)", "C)", "D)"),
  legend = "bottom",
  common.legend = TRUE
)

# View
FS2

The PCA has limitations, including sensitivity to the scale of variables. It’s essential to standardize variables to have unit variance before conducting PCA to mitigate this issue. PCA also assumes linearity; if variables do not exhibit linear relationships, alternative dimensionality reduction methods like t-SNE or UMAP may be more suitable for non-linear data structures. The principal components (new dimensions) created by PCA are linear combinations of the original variables (original dimensions) and may lack direct interpretability, necessitating careful evaluation. Dimension reduction through PCA inevitably leads to some information loss. While the discarded components are less significant, they may still hold valuable insights. Recognizing these limitations is crucial as it does not lessen PCA’s value but rather enhances its usefulness in guiding its application. It’s recommended to perform PCA before K-means clustering because speeds up the clustering process and optimizes computational resources, especially on large datasets. The PCA optimizes clustering by reducing noise and redundant information, which helps K-means clustering focus on relevant features, leading to more accurate cluster assignments. Dimension reduction captures important information into few dimensions, this reduces the number of features and preserves most of the variation, making subsequent clustering more efficient and improves interpretation.

t-Distributed Stochastic Neighbor Embedding (t-SNE)

t-SNE of patients with T2DM

tsne_data.dm <- old_dim.dm |>
  dplyr::mutate(ID = row_number())

meta_numerical <- tsne_data.dm |>
  dplyr::select(ID, outcome)

tSNE_fit <- tsne_data.dm |>
  dplyr::select(where(is.numeric)) |>
  scale() |>
  Rtsne()

tSNE_df <- tSNE_fit$Y |>
  as.data.frame() |>
  rename(tSNE1 = "V1",
         tSNE2 = "V2") |>
  mutate(ID = row_number())

tSNE_df <- tSNE_df |>
  inner_join(meta_numerical, by = "ID")

tSNE_df |>
  ggplot(aes(x = tSNE1,
             y = tSNE2,
             color = outcome)) +
  geom_point() +
  scale_color_igv() +
  labs(color = "Group") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.line = element_line(color = "black"))

t-SNE of patients without T2DM

tsne_data.non <- old_dim.non |>
  dplyr::mutate(ID = row_number())

meta_numerical <- tsne_data.non |>
  dplyr::select(ID, outcome)

tSNE_fit <- tsne_data.non |>
  dplyr::select(where(is.numeric)) |>
  scale() |>
  Rtsne()

tSNE_df <- tSNE_fit$Y |>
  as.data.frame() |>
  rename(tSNE1 = "V1",
         tSNE2 = "V2") |>
  mutate(ID = row_number())

tSNE_df <- tSNE_df |>
  inner_join(meta_numerical, by = "ID")

tSNE_df |>
  ggplot(aes(x = tSNE1,
             y = tSNE2,
             color = outcome)) +
  geom_point() +
  scale_color_igv() +
  labs(color = "Group") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.line = element_line(color = "black"))

Save outputs

All distances are in inches

outputs_dir <- "/Academic/COVID-19 proyects/COVID-19_DMII_ICA/COVID-19_ICA_CC/outputs/"
sect_properties <- prop_section(
  type = "continuous",
  page_margins = page_mar(bottom = 1, top = 1, right = 1, left = 1))

Tables

# Save tables
save_as_docx(
  flex_table_1,
  path = stringr::str_glue(outputs_dir, "Table_1.docx"),
  align = "center")

save_as_docx(
  flex_table_2,
  path = stringr::str_glue(outputs_dir, "Table_2.docx"),
  align = "center")

save_as_docx(
  flex_table_3,
  path = stringr::str_glue(outputs_dir, "Table_3.docx"),
  align = "center")

save_as_docx(
  flex_table_4,
  path = stringr::str_glue(outputs_dir, "Table_4.docx"),
  align = "center")

save_as_docx(
  flex_table_5,
  path = stringr::str_glue(outputs_dir, "Table_5.docx"),
  align = "center")

save_as_docx(
  flex_table_s1,
  path = stringr::str_glue(outputs_dir, "Table_s1.docx"),
  align = "center")

save_as_docx(
  flex_table_s2,
  path = stringr::str_glue(outputs_dir, "Table_s2.docx"),
  align = "center")

Figures

# Save supplementary figure 1 (EPS)
ggsave(
  plot = FS1,
  filename = stringr::str_glue(outputs_dir, "FIG_S1.eps"),
  width = 12,
  height = 12,
  units = "in")

# Save supplementary figure 1 (PNG)
ggsave(
  plot = FS1,
  filename = stringr::str_glue(outputs_dir, "FIG_S1.png"),
  width = 12,
  height = 12,
  dpi = 300,
  units = "in")

# Save supplementary figure 1 (JPEG)
ggsave(
  plot = FS1,
  filename = stringr::str_glue(outputs_dir, "FIG_S1.jpeg"),
  width = 12,
  height = 12,
  dpi = 500,
  units = "in")

# Save supplementary figure 2 (EPS)
ggsave(
  plot = F1_dm,
  filename = stringr::str_glue(outputs_dir, "FIG_S2.eps"),
  width = 14,
  height = 6,
  units = "in")

# Save supplementary figure 2 (PNG)
ggsave(
  plot = F1_dm,
  filename = stringr::str_glue(outputs_dir, "FIG_S2.png"),
  width = 14,
  height = 6,
  dpi = 500,
  units = "in")

# Save supplementary figure 2 (JPEG)
ggsave(
  plot = F1_dm,
  filename = stringr::str_glue(outputs_dir, "FIG_S2.jpeg"),
  width = 14,
  height = 6,
  dpi = 500,
  units = "in")

# Save supplementary figure 3 (EPS)
ggsave(
  plot = F1_non,
  filename = stringr::str_glue(outputs_dir, "FIG_S3.eps"),
  width = 14,
  height = 6,
  units = "in")

# Save supplementary figure 3 (PNG)
ggsave(
  plot = F1_non,
  filename = stringr::str_glue(outputs_dir, "FIG_S3.png"),
  width = 14,
  height = 6,
  dpi = 500,
  units = "in")

# Save supplementary figure 3 (JPEG)
ggsave(
  plot = F1_non,
  filename = stringr::str_glue(outputs_dir, "FIG_S3.jpeg"),
  width = 14,
  height = 6,
  dpi = 500,
  units = "in")